aish/config.lua

-- config.lua — model registry, routing rules, user preferences.
-- Loaded with dofile() at startup; returns a plain Lua table.
-- See docs/PHASE0.md §10 for resolution order and full schema.
--
-- Per issue #12: hossenfelder is the canonical single-URL broker. It does
-- model-aware routing server-side (local models on boltzmann; cloud routes
-- through OpenRouter using its own bearer auth — no client-side key here).
-- Discovery: GET http://hossenfelder.fritz.box:8082/v1/models.

local HOSSENFELDER = "http://hossenfelder.fritz.box:8082"

return {
    default_model = "fast",

    models = {
        fast = {
            endpoint    = HOSSENFELDER,
            model       = "qwen2.5-coder-1.5b-q4_k_m.gguf",
            temperature = 0.2,
        },
        deep = {
            endpoint    = HOSSENFELDER,
            model       = "qwen3-30b-a3b-instruct",
            timeout_ms  = 1800000,   -- 10 min; Nemo on RK3588 is patient work
            temperature = 0.1,
        },
        cloud = {
            endpoint    = HOSSENFELDER,
            model       = "anthropic/claude-haiku-4.5",
            temperature = 0.2,
        },
    },

    shell = {
        known_commands = {
            "ls", "cat", "cd", "grep", "find", "cp", "mv", "rm",
            "mkdir", "rmdir", "git", "make", "cmake", "gcc", "clang",
            "python3", "luajit", "ssh", "scp", "curl", "wget",
        },
        capture_output = true,    -- inject exec output into context
        confirm_cmd    = true,    -- prompt before executing CMD: suggestions
    },

    context = {
        max_turns    = 40,
        token_budget = 4096,
    },

    history = {
        dir = (os.getenv("HOME") or ".") .. "/.local/share/aish",
    },

    -- Phase 2 (docs/PHASE2.md): MCP server registry + tool-call policy.
    -- The block is OFF by default — connect-at-startup happens only when
    -- `servers` is non-empty. Uncomment + adjust per your fleet.
    --
    -- mcp = {
    --     servers = {
    --         -- Each entry: alias = { url = "...", auth_token = "..." | auth_env = "..." }
    --         -- auth_token literal > auth_env env-var indirection > nil (no auth).
    --         -- Aliases become the namespace prefix on tool names sent to the model
    --         -- ("<alias>__<tool>" — e.g. "boltzmann__list_dir"). The separator is
    --         -- "__" (two underscores) because Anthropic via Bedrock validates tool
    --         -- names against ^[a-zA-Z0-9_-]{1,128}$ — dots are rejected.
    --         -- Aliases themselves must not contain "__".
    --         boltzmann = {
    --             url      = "http://boltzmann.fritz.box:8080/mcp",
    --             auth_env = "BOLTZMANN_MCP_TOKEN",
    --         },
    --         hertz = {
    --             url      = "http://hertz.fritz.box:8080/mcp",
    --             auth_env = "HERTZ_MCP_TOKEN",
    --         },
    --         broglie = {
    --             url = "http://broglie.fritz.box:8080/mcp",  -- LAN-only, no auth
    --         },
    --     },
    --
    --     -- Per-call confirm gate auto-approve policy.
    --     -- Key forms:
    --     --   "<alias>__<tool>" — auto-approve one specific tool
    --     --   "<alias>__*"      — auto-approve every tool on that server
    --     -- Anything not matched falls back to the [y/N] prompt.
    --     auto_approve = {
    --         ["boltzmann__read_file"]    = true,
    --         ["boltzmann__list_dir"]     = true,
    --         ["boltzmann__search_files"] = true,
    --         ["hertz__*"]                = true,   -- trust the hub fully
    --     },
    --
    --     -- Tool-call sub-loop budget per ask_ai turn. Hitting the cap surfaces
    --     -- a status and breaks; default 8 if absent.
    --     max_tool_depth = 8,
    -- },

    -- Phase 3 (docs/PHASE3.md): Chuck Norris autonomous mode + destructive-op
    -- heuristic. The block is OFF by default (sane defaults kick in when
    -- absent); uncomment to tune.
    --
    -- safety = {
    --     -- LLM second-opinion on commands the static patterns don't flag.
    --     -- Default true. Set false for static-only operation (faster, but
    --     -- misses novel destructive patterns the static list doesn't know
    --     -- about — bash -c content, custom destructive idioms, etc.).
    --     llm_second_opinion = true,
    --
    --     -- Which configured model to use for the YES/NO destructive probe.
    --     -- Precedence: this field → models.deep → models[default_model].
    --     -- R-B2: prefer an INDEPENDENT model class from the action-emitting
    --     -- model (avoids self-policing). Recommended values:
    --     --   "cloud"  — anthropic/claude-haiku-4.5 via openrouter. Fast and
    --     --              reliable. Costs money per probe (typical Norris
    --     --              session = 16 probes max, often cached).
    --     --   "deep"   — local large model (qwen3-30b on this fleet). Free
    --     --              but slow on RK3588 hardware (~1-3s per probe).
    --     --              Falls back here automatically if not set.
    --     --   "fast"   — same model as the action-emitter. NOT RECOMMENDED
    --     --              (circular trust); use only when no other option.
    --     llm_model = "cloud",
    --
    --     -- Norris planning-loop budget. Iterations of safety.norris_step.
    --     -- Each iteration is one broker round-trip + dispatch of actions.
    --     -- Default 8. Bump for long-running goals; cap low for testing.
    --     max_norris_steps = 8,
    -- },

    -- Phase 4 (docs/PHASE4.md): cross-session memory.jsonl + startup
    -- injection + :memory management surface. The block is OFF by
    -- default (no startup injection); uncomment to tune. Note that
    -- :remember / :memory list / :memory forget / :memory summarize
    -- all work without this block — they store to <history.dir>/
    -- memory.jsonl regardless. The block only configures the
    -- injection-into-system-prompt behavior at startup.
    --
    -- memory = {
    --     -- Cap on total characters injected at startup. ~2000 chars ≈
    --     -- 500 tokens. LRU-by-ts selection if your memory.jsonl has
    --     -- more recent items than fit. Older items remain in the
    --     -- file; only injection is bounded. Suppressed entirely in
    --     -- Norris mode (R-C1).
    --     inject_max_chars = 2000,
    --
    --     -- Which configured model to use for :memory summarize.
    --     -- Defaults to the active model when nil. Use "fast" for
    --     -- speed; "deep" or "cloud" for better extraction quality
    --     -- (cloud may have variable cost per session).
    --     summarizer_model = "fast",
    -- },
}