diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..53b4768 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Markus Fritsche + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 4c70367..ade8b75 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ Replace these with your own llama.cpp endpoints if you're not on that LAN. ## License -Not yet selected. Default-private until decided. +MIT — see [`LICENSE`](LICENSE). ## Project conventions diff --git a/bin/aish b/bin/aish new file mode 100755 index 0000000..48cd7ca --- /dev/null +++ b/bin/aish @@ -0,0 +1,23 @@ +#!/bin/sh +# aish — AI-augmented conversational shell launcher. +# Source of truth: git.reauktion.de/marfrit/aish +# +# Installed by the aish package at /usr/bin/aish; execs LuaJIT against +# the packaged main.lua under $AISH_LIB (default /usr/share/lua/5.1/aish). +# +# Dev mode: AISH_LIB=$HOME/src/aish aish ... + +AISH_LIB="${AISH_LIB:-/usr/share/lua/5.1/aish}" + +if [ ! -r "$AISH_LIB/main.lua" ]; then + echo "aish: $AISH_LIB/main.lua not found." >&2 + echo "aish: set AISH_LIB to the directory containing main.lua." >&2 + exit 2 +fi + +if ! command -v luajit >/dev/null 2>&1; then + echo "aish: luajit not found in PATH. Install luajit." >&2 + exit 2 +fi + +exec luajit "$AISH_LIB/main.lua" "$@" diff --git a/examples/config.lua b/examples/config.lua new file mode 100644 index 0000000..bac5084 --- /dev/null +++ b/examples/config.lua @@ -0,0 +1,488 @@ +-- config.lua — example aish configuration. +-- Shipped by the aish package at /usr/share/doc/aish/examples/config.lua. +-- Copy to ~/.config/aish/config.lua (preferred) and adapt to your fleet: +-- +-- install -Dm600 /usr/share/doc/aish/examples/config.lua \ +-- ~/.config/aish/config.lua +-- +-- Mode 0600 matters because this file can carry MCP bearer tokens. The +-- two tokens shown in the mcp.servers block below are PLACEHOLDERS and +-- must be replaced — prefer the auth_env env-var indirection form (export +-- MCP_PVE1_TOKEN=... in your shell init) over committing literals. +-- +-- Loaded with dofile() at startup; returns a plain Lua table. +-- See docs/PHASE0.md §10 for resolution order and full schema. +-- +-- Per issue #12: hossenfelder is the canonical single-URL broker. It does +-- model-aware routing server-side (local models on boltzmann; cloud routes +-- through OpenRouter using its own bearer auth — no client-side key here). +-- Discovery: GET http://hossenfelder.fritz.box:8082/v1/models. +-- +-- Phase 9 (docs/PHASE9.md): a `.aish.lua` in/above your cwd (walking up +-- to $HOME) overlays this user config. First encounter prompts to trust; +-- sha256-pinned in ~/.aish/trusted-projects. Use it for repo-specific +-- model presets, permissions, hooks, etc. +-- +-- IMPORTANT: shallow merge. If your `.aish.lua` sets a top-level block +-- (models, permissions, cost, shell, ...), it REPLACES the user's +-- entire block — list every entry you want available OR omit the block +-- to keep the user's. Inspect the merge via `:config show` at runtime. + +-- Replace with your own broker URL. This default targets the +-- maintainer's home-LAN broker — useful as a structural example +-- but will not resolve outside that network. +local HOSSENFELDER = "http://hossenfelder.fritz.box:8082" + +return { + default_model = "fast", + + -- 2026-05-17: full fleet exposed. 6 local + 14 cloud models live on the + -- hossenfelder broker. Aliases below match the model IDs returned by + -- /v1/models so the broker can route without prefix stripping. + models = { + -- ── LOCAL ──────────────────────────────────────────────────────── + fast = { -- alias for the 1.5B; default + endpoint = HOSSENFELDER, + model = "qwen2.5-coder-1.5b-q4_k_m.gguf", + temperature = 0.2, + }, + ["coder-3b"] = { -- pve2 (Haswell NUC, 1.8 GB model, ~4 tok/s) + endpoint = HOSSENFELDER, + model = "qwen2.5-coder-3b-instruct-pve2", + temperature = 0.2, + }, + ["coder-7b"] = { -- pve1 (Haswell NUC) + endpoint = HOSSENFELDER, + model = "qwen2.5-coder-7b-instruct-pve1", + temperature = 0.2, + }, + ["coder-7b-snappy"] = { -- dirac:8081, low-latency completion + endpoint = HOSSENFELDER, + model = "qwen-coder-7b-snappy-8k", + temperature = 0.2, + }, + ["qwen-7b"] = { -- dirac:8080 chat + endpoint = HOSSENFELDER, + model = "Qwen2.5-7B-Instruct-Q4_K_M.gguf", + temperature = 0.2, + }, + deep = { -- boltzmann:8085 — Qwen3-30B-A3B MoE, q8 KV cache + endpoint = HOSSENFELDER, + model = "qwen3-30b-a3b-instruct-2507", + -- timeout_ms inherits broker default (30 min) — 30B prompt processing + -- of long contexts on CPU can take 15-25 min before first token. + temperature = 0.1, + }, + + -- ── CLOUD (OpenRouter via hossenfelder) ─────────────────────────── + cloud = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5", temperature = 0.2 }, + haiku = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5", temperature = 0.2 }, + sonnet = { endpoint = HOSSENFELDER, model = "anthropic/claude-sonnet-4.6", temperature = 0.2 }, + opus = { endpoint = HOSSENFELDER, model = "anthropic/claude-opus-4.7", temperature = 0.2 }, + gpt5 = { endpoint = HOSSENFELDER, model = "openai/gpt-5.5", temperature = 0.2 }, + ["gpt5-mini"] = { endpoint = HOSSENFELDER, model = "openai/gpt-5.4-mini", temperature = 0.2 }, + deepseek = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v3.2", temperature = 0.2 }, + ["deepseek-v4"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash", temperature = 0.2 }, + ["deepseek-pro"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-pro", temperature = 0.2 }, + mistral = { endpoint = HOSSENFELDER, model = "mistralai/mistral-large-2512", temperature = 0.2 }, + ["qwen-cloud"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3.5-27b", temperature = 0.2 }, + owl = { endpoint = HOSSENFELDER, model = "openrouter/owl-alpha", temperature = 0.2 }, + + -- ── CLOUD FREE-TIER ────────────────────────────────────────────── + ["free-qwen-coder"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3-coder:free", temperature = 0.2 }, + ["free-llama-70b"] = { endpoint = HOSSENFELDER, model = "meta-llama/llama-3.3-70b-instruct:free", temperature = 0.2 }, + ["free-qwen-80b"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3-next-80b-a3b-instruct:free", temperature = 0.2 }, + ["free-gpt-oss"] = { endpoint = HOSSENFELDER, model = "openai/gpt-oss-120b:free", temperature = 0.2 }, + ["free-glm"] = { endpoint = HOSSENFELDER, model = "z-ai/glm-4.5-air:free", temperature = 0.2 }, + ["free-deepseek-v4"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash:free", temperature = 0.2 }, + }, + + shell = { + known_commands = { + "ls", "cat", "cd", "grep", "find", "cp", "mv", "rm", + "mkdir", "rmdir", "git", "make", "cmake", "gcc", "clang", + "python3", "luajit", "ssh", "scp", "curl", "wget", + }, + capture_output = true, -- inject exec output into context + confirm_cmd = true, -- prompt before executing CMD: suggestions + + -- Issue #10: prompt template. When set, replaces the default + -- "[aish:]> " prompt. Variables (substituted via {name}): + -- {model} {ctx_used} {ctx_max} {turn} + -- {cwd} {cwd_short} (cwd with $HOME -> ~) + -- {last_status} (last exec exit code, empty if none yet) + -- {mode} (norris / plan / normal) + -- prompt = "[{model} {ctx_used}/{ctx_max}t T{turn} {mode}] {cwd_short} > ", + }, + + context = { + max_turns = 40, + token_budget = 4096, + }, + + history = { + dir = (os.getenv("HOME") or ".") .. "/.local/share/aish", + }, + + -- Issue #3: pre/post CMD hooks. Optional shell scripts triggered around + -- every CMD: execution. Each hook receives the command on stdin and + -- AISH_CMD / AISH_TURN / AISH_CWD as env vars. Non-zero exit on pre_cmd + -- aborts execution; post_cmd's exit code is ignored but its stdout is + -- logged. Default off (no hooks). Uncomment to enable. + -- hooks = { + -- pre_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/pre-cmd", + -- post_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/post-cmd", + -- }, + + -- Issue #13: secret redaction. Vault is a separate file at ~/.aish/ + -- secrets.lua (mode 0600 enforced). When set, outbound broker messages + -- are scrubbed: vault literals + autodetect heuristics (OpenAI sk-, + -- OpenRouter sk-or-v1-, GitHub ghp_/gho_/ghs_, AWS AKIA, JWT eyJ..., + -- SSH/GPG PRIVATE KEY headers) become $AISH_SECRET_NNN placeholders. + -- The streamed reply is rehydrated before display so the user sees + -- real values. Per-broker override via models[*].redact: + -- "off" -- no scrubbing (trusted local) + -- "vault" -- vault literals only + -- "vault+autodetect" -- + heuristics (default when vault loaded) + -- "stealth" -- + heuristics, opaque decoys, no rehydrate + -- Default per-broker is the global config.secrets.default, falling + -- back to "vault+autodetect" when vault loaded, else "off". + -- secrets = { + -- vault = "~/.aish/secrets.lua", + -- default = "vault+autodetect", -- applies when models[*].redact is nil + -- }, + + -- Issue #8: background CMD (CMD&: marker). Requires history.dir set + -- (logs land at /bg/.log + .status sidecar). The + -- feature is always-on once history.dir exists — no config flag — but + -- only fires when the model emits "CMD&: " or the user runs :bg-spawn. + + -- Issue #9: permission policy DSL for AI-suggested CMD: lines. When set, + -- supersedes shell.confirm_cmd. Patterns are Lua patterns (NOT regex) + -- per substrate invariant §3 (no compiled extensions). Priority order: + -- deny > confirm > allow; first match in the chosen category wins. + -- Unmatched commands default to "confirm". Probe with :perms check . + -- permissions = { + -- allow = { "^ls%s", "^cat%s", "^git status", "^git diff" }, + -- confirm = { "^rm%s", "^git push", "^docker%s", "^sudo%s" }, + -- deny = { "^ssh%s+root@", "^curl%s+http[^s]" }, + -- }, + + -- Phase 2 (docs/PHASE2.md): MCP server registry + tool-call policy. + -- Aliases become the namespace prefix on tool names sent to the model + -- ("__" — e.g. "pve1__list_dir"). Separator is "__" because + -- Anthropic via Bedrock validates tool names against ^[a-zA-Z0-9_-]{1,128}$ + -- (dots are rejected). Aliases themselves must not contain "__". + -- auth_token literal > auth_env env-var indirection > nil (no auth). + mcp = { + servers = { + -- Example MCP server entries. Replace the URL with your own + -- lmcp endpoint and source the bearer token via auth_env so + -- it never lands in version control. + -- + -- pve1: small sandbox host (stock lmcp tools — shell, read_file, + -- write_file, edit_file, list_dir, search_files, shell_bg). + -- pve1 = { + -- url = "http://pve1.example.local:8080/mcp", + -- auth_env = "MCP_PVE1_TOKEN", + -- }, + -- + -- hertz: home-network hub with lmcp v1.2+ built-in fetch / + -- web_search tools — useful for letting the model do web + -- research without leaving aish. Auto-approving these two + -- is safe because they carry MCP readOnlyHint=true and + -- openWorldHint=true (see auto_approve block below). + -- hertz = { + -- url = "http://hertz.example.local:8080/mcp", + -- auth_env = "MCP_HERTZ_TOKEN", + -- }, + }, + + -- Per-call confirm gate auto-approve policy. fetch / web_search + -- carry MCP readOnlyHint=true + openWorldHint=true; safe to skip + -- the per-call prompt since they neither mutate nor leak local + -- state. Anything writable on the host (mqtt_pub, ha_cli, lxc_exec, + -- wol_and_wait, ...) should keep prompting. + auto_approve = { + -- ["hertz__fetch"] = true, + -- ["hertz__web_search"] = true, + }, + + -- Tool-call sub-loop budget per ask_ai turn. Default 8 if absent. + max_tool_depth = 8, + }, + + -- Phase 3 (docs/PHASE3.md): Chuck Norris autonomous mode + destructive-op + -- heuristic. The block is OFF by default (sane defaults kick in when + -- absent); uncomment to tune. + -- + -- safety = { + -- -- LLM second-opinion on commands the static patterns don't flag. + -- -- Default true. Set false for static-only operation (faster, but + -- -- misses novel destructive patterns the static list doesn't know + -- -- about — bash -c content, custom destructive idioms, etc.). + -- llm_second_opinion = true, + -- + -- -- Which configured model to use for the YES/NO destructive probe. + -- -- Precedence: this field → models.deep → models[default_model]. + -- -- R-B2: prefer an INDEPENDENT model class from the action-emitting + -- -- model (avoids self-policing). Recommended values: + -- -- "cloud" — anthropic/claude-haiku-4.5 via openrouter. Fast and + -- -- reliable. Costs money per probe (typical Norris + -- -- session = 16 probes max, often cached). + -- -- "deep" — local large model (qwen3-30b on this fleet). Free + -- -- but slow on RK3588 hardware (~1-3s per probe). + -- -- Falls back here automatically if not set. + -- -- "fast" — same model as the action-emitter. NOT RECOMMENDED + -- -- (circular trust); use only when no other option. + -- llm_model = "cloud", + -- + -- -- Norris planning-loop budget. Iterations of safety.norris_step. + -- -- Each iteration is one broker round-trip + dispatch of actions. + -- -- Default 8. Bump for long-running goals; cap low for testing. + -- max_norris_steps = 8, + -- }, + + -- Phase 4 (docs/PHASE4.md): cross-session memory.jsonl + startup + -- injection + :memory management surface. The block is OFF by + -- default (no startup injection); uncomment to tune. Note that + -- :remember / :memory list / :memory forget / :memory summarize + -- all work without this block — they store to / + -- memory.jsonl regardless. The block only configures the + -- injection-into-system-prompt behavior at startup. + -- + -- memory = { + -- -- Cap on total characters injected at startup. ~2000 chars ≈ + -- -- 500 tokens. LRU-by-ts selection if your memory.jsonl has + -- -- more recent items than fit. Older items remain in the + -- -- file; only injection is bounded. Suppressed entirely in + -- -- Norris mode (R-C1). + -- inject_max_chars = 2000, + -- + -- -- Which configured model to use for :memory summarize. + -- -- Defaults to the active model when nil. Use "fast" for + -- -- speed; "deep" or "cloud" for better extraction quality + -- -- (cloud may have variable cost per session). + -- summarizer_model = "fast", + -- + -- -- #102: auto-summarize the session into memory.jsonl on :q. + -- -- When true, shutdown_session runs the same distill flow as + -- -- `:memory summarize`, non-interactively, and auto-adds the + -- -- parsed candidates. Silent no-op for trivial sessions (turn + -- -- count < min_turns_for_summary, default 5). pcall'd so a + -- -- broker failure never blocks :q. + -- auto_summarize_on_quit = true, + -- min_turns_for_summary = 5, + -- summary_model = "fast", -- new alias; summarizer_model + -- -- above is still honored for + -- -- back-compat. + -- }, + + -- Phase 5 (docs/PHASE5.md): multi-model routing + cloud fallback + + -- summarize-on-evict. OFF by default — auto-routing can spend money + -- silently on the cloud preset; require explicit opt-in. + -- + -- routing = { + -- -- Enable auto-routing per request. When true, router.classify_model + -- -- inspects each prompt and may switch the model for THAT request + -- -- only (the :model selection is preserved across requests). + -- -- Default false. Toggle at runtime with :route on / :route off. + -- auto = true, + -- + -- -- Class → model mapping. nil = "keep current" (heuristic fires + -- -- but no override). Ships with reasoning = nil because mapping + -- -- "explain ..." prompts to a paid cloud model would spend money + -- -- silently — opt in by uncommenting the reasoning line below. + -- classes = { + -- code = "deep", -- code-like prompts to local deep + -- -- reasoning = "cloud", -- OPT-IN: "explain"/"why"/"how does" → paid + -- -- default = nil, -- keep active model + -- }, + -- + -- -- Single-hop retry on transport failure (HTTP 5xx, 408, + -- -- 404 model_not_found, DNS, connection refused, timeouts). + -- -- Retries against fallback_model once. Skipped if any text + -- -- has already streamed (no partial-output duplication). + -- -- Toggle at runtime with :fallback on / :fallback off. + -- fallback = false, -- default off (cost-safety) + -- fallback_model = "cloud", + -- + -- -- Issue #86: per-class system_prompt override. When the + -- -- classified request falls into a class with an entry here, + -- -- the BASE system_prompt is REPLACED for that one request + -- -- (dynamic blocks — [background], [project], [earlier + -- -- summary], NORRIS suffix — still compose on top). Mostly + -- -- useful for tightening small local models' instruction + -- -- adherence. Default {} (no override). + -- system_prompts = { + -- code = [[You are a code assistant. Rules: + -- 1. Output ONLY the requested code or command. + -- 2. No prose explanation unless explicitly asked. + -- 3. Wrap shell commands in CMD: prefix. + -- 4. Max response: 200 tokens.]], + -- default = [[You are a shell assistant. + -- Output shell commands as: CMD: + -- Output answers as single short sentences. + -- Do not ask clarifying questions.]], + -- -- reasoning routes to cloud; no override usually needed + -- }, + -- + -- -- Issue #88: per-class GBNF grammar passthrough. llama.cpp + -- -- constrains the sampler to ONLY emit tokens matching the + -- -- grammar — eliminates format drift on small models. Cloud + -- -- (Anthropic/Bedrock) silently ignores the field, so default + -- -- passthrough is safe; no per-model opt-out needed. Misformed + -- -- grammar surfaces as a broker error at request time. + -- grammars = { + -- code = [[root ::= "CMD: " [^\n]+ "\n"]], + -- default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]], + -- }, + -- }, + -- + -- Issue #88 (continued): for the safety LLM probe (YES/NO + -- destructive classification), set safety.probe_grammar to force + -- the probe model to emit exactly YES or NO. Eliminates the + -- regex-match fallback for unparseable verdicts; small models + -- become reliable enough to use as the probe. + -- + -- safety = { + -- llm_second_opinion = true, + -- llm_model = "fast", + -- probe_grammar = [[root ::= ("YES" | "NO")]], + -- }, + + -- ── Issue #87 (route-aware context compression). + -- When a routed model preset has `local_compress = true`, each + -- broker call against THAT preset gets a compressed view of + -- ctx.turns: only the last `keep_turns` turns; any turn whose + -- content exceeds `max_turn_chars` is tail-truncated. The full + -- context lives on (visible via :history); compression is purely + -- per-request for small models that effectively use a fraction + -- of their advertised context window. + -- + -- Set the per-model opt-in on models[]: + -- models.fast = { ..., local_compress = true } + -- Defaults live under context.compress: + -- context = { + -- ... + -- compress = { keep_turns = 2, max_turn_chars = 800 }, + -- } + -- + -- Trade-off documented in the FR: tool turns lose information + -- when tail-truncated. Acceptable for shell-output blocks (the + -- tail is usually the relevant bit); known limitation for + -- structured tool results. Disable per-model if it bites. + + -- ── Issue #89 / Phase 10: cloud preplanner → local executor split. + -- When cfg.norris.preplanner names a model preset, :norris launch + -- fires ONE broker.chat against that preset asking for a sequence + -- of TASK: lines. Parsed list (capped at tasks_max) + -- becomes ctx.norris_tasks; the executor model (cfg.norris.executor, + -- defaulting to the active :model selection) runs each task with + -- the current task shown in the per-step header. + -- + -- Goal: small fast local models are cheap per step but easily + -- distracted on multi-step plans; cloud is capable at planning + -- but expensive per step. Use cloud ONCE for the plan, local for + -- every step. Falls back to single-model Norris (existing + -- behavior) when preplanner unset / fails / produces no TASKs. + -- + -- norris = { + -- preplanner = "anthropic", -- model name in cfg.models; + -- -- this preset is called ONCE per + -- -- :norris launch. Omit to run + -- -- single-model (Phase 6 behavior). + -- executor = "fast", -- model that runs each step. + -- -- Omit to use the active :model. + -- tasks_max = 16, -- cap on preplan list size. + -- -- preplan_system = "...", -- override the built-in prompt + -- }, + -- + -- :cost detail separates norris-preplan and norris rows so you + -- can see cloud planning cost vs local execution cost. The + -- preplan call does NOT retry via fallback_model (a different + -- model = a different decomposition; clean hard-fail to single- + -- model is safer). + + -- ── Phase 5 context summarization on sliding-window eviction. + -- Set INSIDE the context = { ... } block above to enable: + -- context = { + -- max_turns = 40, + -- token_budget = 4096, + -- summarize_on_evict = true, + -- summarizer_model = "fast", -- model name in models{} + -- max_summary_chars = 2000, + -- + -- -- #101 (proactive periodic summarization). When set, + -- -- enforce_cadence fires every N appends (before + -- -- enforce_budget) and folds turns OLDER than + -- -- summarize_keep_recent into ctx.summary. Goal: keep the + -- -- wire prompt tight from the start so small local models + -- -- aren't fed near-budget context until eviction. Composes + -- -- with summarize_on_evict (same summarize_fn closure; + -- -- different trigger). Suppressed in Norris (R-C4 parity). + -- summarize_every_n_turns = 10, -- nil = disabled (default) + -- summarize_keep_recent = 4, + -- }, + -- When summarize_on_evict is true, evicted turn pairs are fed to + -- summarizer_model and the result lives on ctx.summary, appended to + -- the system prompt as [earlier conversation summary]. Suppressed + -- in Norris mode (R-C4 — planner stays on its goal). If broker + -- fails, falls back to Phase 0 silent eviction (no crash). + + -- Phase 6 (docs/PHASE6.md): project file-tree context + :diff / + -- :tree / :highlight metas. The :diff and :tree metas work without + -- any config. The `project` block below only controls the + -- AUTO-injection-at-startup behavior; manual `:tree` always works + -- regardless. Uncomment to enable startup auto-inject. + -- + -- project = { + -- auto_tree = true, -- run `:tree` once at startup + -- tree_depth = 3, -- depth filter for the scan (find fallback only; + -- -- git ls-files emits full repo-relative paths) + -- tree_max_chars = 4096, -- truncate the injected block above this + -- }, + -- + -- :highlight has no config flag in v1 — toggled at runtime only. + -- Requires the external `tree-sitter` CLI plus configured parser- + -- directories with cloned + built `tree-sitter-` grammars + -- (see `:highlight on` for the install hints). + + -- Phase 7 (docs/PHASE7.md): cost / usage observability. broker.lua + -- captures `usage` (+ `cost` for cloud) from every chat/chat_stream + -- call and routes via ctx:add_usage to a per-session accumulator. + -- `:cost` / `:cost detail` / `:cost reset` surface the totals. + -- The `cost` block below configures OPTIONAL warn thresholds — + -- a single status line fires the first time the cumulative + -- crosses each threshold. Default off. Useful when paid cloud + -- presets are in play so runaway-cost sessions get a nudge. + -- + -- cost = { + -- warn_at_dollars = 0.50, -- one-shot warn when cumulative cost crosses + -- warn_at_tokens = 100000, -- one-shot warn when cumulative tokens crosses + -- }, + -- + -- Both flags are independent (R4 — first-to-fire doesn't suppress + -- the other); `:cost reset` re-arms both. Per-turn usage is also + -- written to session/*.jsonl (assistant-turn `usage` field) for + -- after-the-fact scripting; cross-session aggregation deferred + -- to a future phase (Q-C2). + + -- Phase 8 (docs/PHASE8.md): accurate tokenization via the broker's + -- /tokenize endpoint, replacing the Phase 0 §8 char/4 heuristic. + -- Two consequences when use_endpoint=true: + -- (1) Context:estimate_tokens hits /tokenize once per + -- new turn (cached on the turn dict thereafter). Network + -- cost is one round-trip (~30ms) per fresh turn; subsequent + -- calls reuse the cache. + -- (2) Context:enforce_budget actually ENFORCES token_budget now + -- (previously only max_turns was checked). Sessions that + -- fit under char/4 may evict earlier — raise token_budget + -- to match your model's real context window if needed. + -- Cloud endpoints (OpenRouter) don't expose /tokenize; capability + -- cached as unsupported on first probe -> silent char/4 fallback. + -- + -- tokenize = { + -- use_endpoint = true, + -- }, +} diff --git a/main.lua b/main.lua index 403f934..d037c24 100644 --- a/main.lua +++ b/main.lua @@ -2,16 +2,21 @@ -- Phase 0: arg parsing, config load, REPL start. -- See docs/PHASE0.md §4, §10. -p one-shot mode lands per issue #4. --- Make project modules and the vendored dkjson resolvable from the repo root. --- Run aish with the repo root as cwd; PTY-relative resolution lands later. -package.path = "./?.lua;./vendor/?.lua;" .. package.path +-- Resolve modules + vendored dkjson relative to this script's directory, +-- not cwd. Packaged install puts main.lua at /usr/share/lua/5.1/aish/ and +-- the /usr/bin/aish wrapper execs `luajit /usr/share/lua/5.1/aish/main.lua` +-- from whatever cwd the user is in — siblings must still resolve. Dev mode +-- (`luajit main.lua` from repo root) keeps working because arg[0] is then +-- "main.lua" with no "/" — _dir falls back to "./". +local _dir = arg[0]:match("(.*/)") or "./" +package.path = _dir .. "?.lua;" .. _dir .. "vendor/?.lua;" .. package.path local USAGE = [[ aish — AI-augmented conversational shell. Usage: - luajit main.lua [--config ] [--help] -- interactive REPL - luajit main.lua -p "" [--config ] -- one-shot, print + exit + aish [--config ] [--help] -- interactive REPL + aish -p "" [--config ] -- one-shot, print + exit In -p mode, if stdin is not a TTY it's read as additional context and prepended to the prompt as a fenced block — composes with Unix pipes: