-- context.lua — in-memory conversation history + token budget. -- Phase 0: ordered turn list, sliding-window eviction by max_turns. -- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 3 (Q1). -- Phase 2 (added 2026-05-12): support for `role:"tool"` turns and assistant -- turns carrying `tool_calls = [...]`, plus a `use_tool_role` rendering -- toggle for the strict-chat-template fallback path (Q18). -- See docs/PHASE0.md §6, §8 and docs/PHASE2.md §3 / §5. local M = {} -- The §6 default system prompt. The `CMD: ` (exact prefix, single space) -- contract is locked per §3 invariants — do not edit without amending PHASE0. -- Phase 2 appends ~4 lines about MCP tools per PHASE2.md §8 (hybrid: -- static frame here + dynamic tools list in the request body). The block -- is always present even when no MCP servers are configured — the cost -- is ~60 tokens and the model just sees instructions that don't apply. local DEFAULT_SYSTEM_PROMPT = [[ You are aish, an AI-augmented shell assistant. You help the user execute shell commands, write and debug code, and re-engineer software. When suggesting shell commands, output them on a line beginning with exactly "CMD: " so aish can identify and optionally execute them. Be concise. Prefer concrete actions over explanations unless asked. You may have access to MCP tools — they appear in this request's `tools` field. Call a tool by emitting a tool_call; the result will be supplied in the next turn. Use tools for structured operations (file reads, queries, etc.) and `CMD:` lines for local shell commands. Prefer tools when available; fall back to `CMD:` for anything not exposed as a tool.]] local Context = {} Context.__index = Context function M.new(opts) opts = opts or {} return setmetatable({ system_prompt = opts.system_prompt or DEFAULT_SYSTEM_PROMPT, turns = {}, pending_exec_output = nil, -- buffered until next user turn (§6) max_turns = opts.max_turns or 40, token_budget = opts.token_budget or 4096, -- Phase 2: tool-role rendering toggle. true = emit OpenAI-standard -- role:"tool" messages from to_messages(); false = collapse -- assistant+tool_calls and tool turns into a single assistant text -- turn for chat templates that reject the role:"tool" shape. -- Default true per PHASE2.md §12 "Q18 default"; flip from caller. use_tool_role = (opts.use_tool_role == nil) and true or opts.use_tool_role, -- Phase 5: summarize-on-evict. When set, enforce_budget calls -- summarize_fn(prior_summary, evicted_turns) -> string | nil -- and updates ctx.summary instead of silently dropping turns. -- Callback contract per PHASE5.md R-B1: -- (nil, [turns]) → first-time summarize -- (str, [turns]) → additive: extend prior summary with new turns -- (str, nil) → compress: re-summarize the prior summary -- Returns nil → fall back to silent eviction (Phase 0 behavior). summarize_fn = opts.summarize_fn, summary = nil, -- rolling summary string max_summary_chars = opts.max_summary_chars or 2000, -- Phase 6 (#issue Phase 6 §6): project file-tree block, set by -- repl.lua via :tree meta or the cfg.project.auto_tree startup -- hook. nil = no block injected. Cached scan opts (depth / -- max_chars overrides) live on _project_opts for :tree refresh. project = nil, _project_opts = nil, -- Phase 7 (docs/PHASE7.md): cost/usage accumulator. Keyed as -- usage_totals[model_name][category] -> { prompt, completion, -- calls, cost, is_local }. is_local (R6) is a sticky flag -- set when ANY recorded usage for the slot had cost==nil -- (preserves local-vs-cloud-zero distinction for :cost detail -- annotation). cost_warn_state (R4) carries per-threshold -- one-shot flags so warn_at_dollars firing doesn't suppress -- warn_at_tokens. Both survive :reset (R8 parity). usage_totals = {}, cost_warn_state = { dollars = false, tokens = false }, -- Phase 8 (docs/PHASE8.md): optional tokenize callback. When -- set, Context:estimate_tokens uses it (with a per-turn cache -- on turn._tokens for amortization). nil = char/4 fallback -- (Phase 0 §8 — existing behavior, no change). tokenize_fn = opts.tokenize_fn, }, Context) end -- Append a turn. Phase 2 widens what's valid: -- role="user" content (string) required -- role="system" content (string) required (callers shouldn't add system -- turns directly; system prompt is stored separately and -- prepended at to_messages time per §6) -- role="assistant" content may be empty IF tool_calls is non-empty; -- otherwise content required -- role="tool" tool_call_id required + content required; the preceding -- stored turn must be an assistant turn with non-empty -- tool_calls (debug assertion catches sub-loop bugs early -- per PHASE2.md §3 row + N4 in review) function Context:append(turn) assert(type(turn) == "table" and turn.role, "context:append requires { role = ... }") local stored = { role = turn.role, content = turn.content or "" } if turn.role == "assistant" and turn.tool_calls and #turn.tool_calls > 0 then stored.tool_calls = turn.tool_calls elseif turn.role == "tool" then assert(turn.tool_call_id, "context:append role=tool requires tool_call_id") assert(turn.content, "context:append role=tool requires content") -- A tool turn may follow either an assistant-with-tool_calls (the -- first reply in the sub-loop) or another tool turn (subsequent -- replies when the assistant emitted multiple parallel tool_calls). -- Walk back through tool turns until we hit a non-tool; that turn -- must be an assistant with non-empty tool_calls. local j = #self.turns while j > 0 and self.turns[j].role == "tool" do j = j - 1 end local anchor = self.turns[j] assert(anchor and anchor.role == "assistant" and anchor.tool_calls and #anchor.tool_calls > 0, "context:append role=tool must follow assistant with tool_calls " .. "(possibly via prior tool turns in the same sub-loop)") stored.tool_call_id = turn.tool_call_id else assert(turn.content, "context:append requires content for role=" .. turn.role) end self.turns[#self.turns + 1] = stored end -- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output -- is NOT appended as its own user turn — strict chat templates (e.g. mistral- -- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is -- held until the next user turn arrives, then prepended via :append_user. function Context:append_exec_output(out) if not out or out == "" then return end local block = "[exec output]\n" .. out if self.pending_exec_output then self.pending_exec_output = self.pending_exec_output .. "\n" .. block else self.pending_exec_output = block end end -- Append a user turn, flushing any pending exec output as a prefix. Use this -- (rather than raw :append) for any turn whose role is "user". function Context:append_user(content) if self.pending_exec_output then content = self.pending_exec_output .. "\n\n" .. content self.pending_exec_output = nil end self:append({ role = "user", content = content }) end -- Compact JSON-ish rendering used by the fallback (use_tool_role=false) path -- to convert a tool_calls + tool-result pair into inline text. Not OpenAI- -- standard — only used when a strict chat template rejects role:"tool". local function inline_tool_call(call, result_content) return ("[tool: %s]\n%s\n[result]\n%s") :format(call.name or "?", tostring(call.arguments or ""), tostring(result_content or "")) end -- Render the messages array for broker.chat (system prompt prepended; turns -- in order). Phase 2 adds two emission modes: -- -- use_tool_role = true (default): pass through OpenAI-standard -- {role:"assistant", content, tool_calls} and {role:"tool", tool_call_id, -- content} turns unchanged. -- -- use_tool_role = false (fallback, Q18): collapse each -- assistant-with-tool_calls + its following role:"tool" turn(s) into a -- single assistant text turn carrying the synthesized "[tool: name]\n -- \n[result]\n" body. The role:"tool" turns and the -- tool_calls field are NOT emitted. Same logical alternation seen by the -- model (user → assistant → user → assistant), no strict-template breakage. -- -- The system prompt is NOT stored in self.turns per §6. -- Phase 4: [background] block composer. Memory items from memory.jsonl -- are stored on self.memory_items (loaded by repl.lua at startup) and -- rendered as a dim-styled suffix on the system prompt. Suppressed when -- norris_active to avoid stacking large background contexts in -- per-iteration broker calls (R-C1 review fold-in). Cap honored via -- inject_max_chars argument from the caller (already truncated by repl). local function compose_background(items) if not items or #items == 0 then return "" end local lines = { "", "", "[background] (memory.jsonl; manage via :memory)" } for _, it in ipairs(items) do lines[#lines + 1] = ("- (%s) %s"):format(it.kind or "?", (it.content or ""):gsub("\n", " ")) end return table.concat(lines, "\n") end -- Phase 5 R-C4: summary block composer. Mirrors the [background] -- pattern; suppressed under Norris (callers already guard, but the -- function returns "" for empty input regardless). local function compose_summary(summary_text) if not summary_text or summary_text == "" then return "" end return "\n\n[earlier conversation summary]\n" .. summary_text end -- Phase 6: project file-tree composer. Inserted between [background] -- and [earlier summary] so the reading order is memory facts → -- project tree → earlier conversation → NORRIS suffix. Same Norris- -- suppression rule (callers gate via self.norris_active). local function compose_project(project_text) if not project_text or project_text == "" then return "" end return "\n\n[project]\n" .. project_text end -- Phase 3: NORRIS MODE suffix appended to the system prompt when -- self.norris_active. Carries self.norris_goal so eviction of the -- user's "[norris] goal: ..." turn doesn't lose the anchor. local NORRIS_SUFFIX_TEMPLATE = [[ [NORRIS MODE] You are operating autonomously toward the following goal: %s Plan and execute step by step using CMD: lines (for shell) or tool_calls (when MCP tools are available). After each action, you will see its result in the next turn. Re-plan based on what you observe. When the goal is achieved, emit a single line: GOAL: complete on its own line, optionally followed by a brief summary. If the goal is unreachable or you need user input, emit: GOAL: blocked with a one-line reason. Avoid destructive operations unless the goal explicitly requires them. The user will be prompted to confirm destructive actions; expect their verdict in the next turn as a synthesized "[aish] ... skipped by user" message if they declined.]] function Context:to_messages(opts) -- Phase 10 (#86): per-call system_prompt_override. Replaces the -- BASE system_prompt for THIS render only (state unchanged); the -- dynamic blocks ([background], [project], [earlier summary], -- NORRIS suffix) still compose on top. Used by ask_ai's routing -- path when cfg.routing.system_prompts[class] is set — gives -- small local models tighter instructions while preserving -- ambient memory/project context. local sys_content = (opts and opts.system_prompt_override) or self.system_prompt -- Phase 4 [background] memory block + Phase 6 [project] file-tree -- block + Phase 5 [earlier summary] block. All suppressed during -- Norris (R-C1 / R-C4 — avoid redundant tokens per planning -- iteration; planner stays focused on its goal anchor). if not self.norris_active then sys_content = sys_content .. compose_background(self.memory_items) sys_content = sys_content .. compose_project(self.project) sys_content = sys_content .. compose_summary(self.summary) end -- Phase 3 NORRIS MODE suffix. Last block so its instructions dominate. if self.norris_active and self.norris_goal then sys_content = sys_content .. string.format(NORRIS_SUFFIX_TEMPLATE, self.norris_goal) end local msgs = { { role = "system", content = sys_content } } if self.use_tool_role then for _, t in ipairs(self.turns) do local m = { role = t.role, content = t.content } if t.role == "assistant" and t.tool_calls then -- OpenAI shape wraps each call as -- {id, type:"function", function:{name, arguments}}. local oai = {} for i, c in ipairs(t.tool_calls) do oai[i] = { id = c.id, type = "function", ["function"] = { name = c.name, arguments = c.arguments or "" }, } end m.tool_calls = oai elseif t.role == "tool" then m.tool_call_id = t.tool_call_id end msgs[#msgs + 1] = m end return msgs end -- Fallback path: walk turns, collapse asst-with-tool_calls + following -- tool turns into a single asst text turn. Merge consecutive assistant -- turns afterward so the trailing post-tool-result assistant text -- doesn't produce asst/asst back-to-back (which strict templates would -- also reject — same gotcha PHASE0.md §6 warned about for user/user). local function push_or_merge_assistant(content) local last = msgs[#msgs] if last and last.role == "assistant" then last.content = last.content .. "\n" .. content else msgs[#msgs + 1] = { role = "assistant", content = content } end end local i = 1 while i <= #self.turns do local t = self.turns[i] if t.role == "assistant" and t.tool_calls then local parts = {} if t.content and t.content ~= "" then parts[#parts + 1] = t.content end for ci, call in ipairs(t.tool_calls) do local result_text = "" local next_t = self.turns[i + ci] if next_t and next_t.role == "tool" and next_t.tool_call_id == call.id then result_text = next_t.content end parts[#parts + 1] = inline_tool_call(call, result_text) end push_or_merge_assistant(table.concat(parts, "\n")) i = i + 1 + #t.tool_calls elseif t.role == "tool" then -- Orphan tool turn (no preceding asst-tool_calls captured it). -- Shouldn't happen given the :append assertion, but defensively -- drop it rather than emit a malformed message. i = i + 1 elseif t.role == "assistant" then push_or_merge_assistant(t.content or "") i = i + 1 else msgs[#msgs + 1] = { role = t.role, content = t.content } i = i + 1 end end return msgs end -- Evict the oldest pair (user + assistant) while we exceed max_turns -- OR token_budget (Phase 8 pillar 5). Returns total turns evicted. -- Caller is responsible for rendering the §8 status line. -- -- R2 guard: when system_prompt alone exceeds token_budget, the OR -- condition stays true even when turns are empty — would spin -- forever calling table.remove on a 0-length list. The `and -- #self.turns > 0` clause ensures we exit when there's nothing -- left to evict. Over-budget system_prompts (large [project] -- blocks, etc.) are then on the user to shrink via :tree off / -- :memory clear / etc. function Context:enforce_budget() local evicted = 0 while (#self.turns > self.max_turns or self:estimate_tokens() > self.token_budget) and #self.turns > 0 do -- Collect evicted slice (pair: user + assistant) local pair = {} pair[#pair + 1] = self.turns[1] if #self.turns >= 2 then pair[#pair + 1] = self.turns[2] end -- Phase 5: ask the summarize callback (if wired) to absorb this -- slice into the rolling summary. Callback contract per R-B1: -- summarize_fn(prior_summary, evicted_turns) -> string | nil -- nil return → silent eviction (Phase 0 behavior). if self.summarize_fn then local ok, new_summary = pcall(self.summarize_fn, self.summary, pair) if ok and type(new_summary) == "string" and new_summary ~= "" then self.summary = new_summary -- R-C1: if grown past cap, compress in a second pass. if #self.summary > self.max_summary_chars then local ok2, compressed = pcall(self.summarize_fn, self.summary, nil) if ok2 and type(compressed) == "string" and compressed ~= "" then self.summary = compressed end end end end -- Remove the pair from turns (matches Phase 0 visible effect) table.remove(self.turns, 1) evicted = evicted + 1 if #self.turns > 0 and (#self.turns > self.max_turns or evicted % 2 == 1) then table.remove(self.turns, 1) evicted = evicted + 1 end end return evicted end -- Phase 0 §8: char/4 heuristic. Phase 8 (Q1 resolved): when -- self.tokenize_fn is set, use it for accuracy. Per-turn _tokens -- cache amortizes after the first count. -- -- system_prompt is recomposed each call (memory/project/summary -- blocks are dynamic), so it's not cached — one tokenize round-trip -- per call when tokenize_fn is active. -- -- Turn content is immutable after append (see Context:append; we -- never mutate stored turns). The cache on t._tokens is therefore -- safe to live forever on the turn; it dies with the turn on :reset. function Context:estimate_tokens() if self.tokenize_fn then local n = self.tokenize_fn(self.system_prompt) for _, t in ipairs(self.turns) do if t._tokens == nil then t._tokens = self.tokenize_fn(t.content) end n = n + t._tokens end return n end -- char/4 fallback (Phase 0 behavior, unchanged when tokenize_fn nil) local n = #self.system_prompt for _, t in ipairs(self.turns) do n = n + #t.content end return math.floor(n / 4) end -- Phase 7: cost/usage accumulator helpers. -- -- Context:add_usage(model_name, category, usage) -- Increment the (model, category) slot. usage is the payload from -- broker.lua's on_delta("usage", ...): { prompt_tokens, completion_ -- tokens, total_tokens, cost (nil for local per R6), model, category }. -- We use the model_name + category args (not the payload fields) -- because the caller may want to normalize (e.g., key by req_cfg -- alias rather than model_cfg.model). function Context:add_usage(model_name, category, usage) model_name = model_name or "?" category = category or "main" self.usage_totals = self.usage_totals or {} local m = self.usage_totals[model_name] or {} local c = m[category] or { prompt = 0, completion = 0, calls = 0, cost = 0, -- R6: sticky flag; set once any nil-cost usage lands here. is_local = false, } c.prompt = c.prompt + (usage.prompt_tokens or 0) c.completion = c.completion + (usage.completion_tokens or 0) c.calls = c.calls + 1 if usage.cost == nil then c.is_local = true -- preserves local-vs-cloud-zero per R6 else c.cost = c.cost + usage.cost end m[category] = c self.usage_totals[model_name] = m end function Context:total_cost() local total = 0 for _, m in pairs(self.usage_totals or {}) do for _, c in pairs(m) do total = total + (c.cost or 0) end end return total end -- Returns (prompt_tokens, completion_tokens) summed across all slots. function Context:total_tokens() local p, comp = 0, 0 for _, m in pairs(self.usage_totals or {}) do for _, c in pairs(m) do p = p + (c.prompt or 0) comp = comp + (c.completion or 0) end end return p, comp end -- :cost reset path — zero accumulator AND clear per-threshold one-shot flags. function Context:reset_usage() self.usage_totals = {} self.cost_warn_state = { dollars = false, tokens = false } end function Context:reset() self.turns = {} self.pending_exec_output = nil self.summary = nil -- R8 parity: usage_totals + cost_warn_state preserved (matches -- memory_items + project — "ambient context survives a user- -- driven conversation reset"). Use :reset_usage to zero the -- cost meter explicitly. end return M