-- context.lua — in-memory conversation history + token budget. -- Phase 0: ordered turn list, sliding-window eviction by max_turns. -- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 3 (Q1). -- Phase 2 (added 2026-05-12): support for `role:"tool"` turns and assistant -- turns carrying `tool_calls = [...]`, plus a `use_tool_role` rendering -- toggle for the strict-chat-template fallback path (Q18). -- See docs/PHASE0.md §6, §8 and docs/PHASE2.md §3 / §5. local M = {} -- The §6 default system prompt. The `CMD: ` (exact prefix, single space) -- contract is locked per §3 invariants — do not edit without amending PHASE0. -- Phase 2 appends ~4 lines about MCP tools per PHASE2.md §8 (hybrid: -- static frame here + dynamic tools list in the request body). The block -- is always present even when no MCP servers are configured — the cost -- is ~60 tokens and the model just sees instructions that don't apply. local DEFAULT_SYSTEM_PROMPT = [[ You are aish, an AI-augmented shell assistant. You help the user execute shell commands, write and debug code, and re-engineer software. When suggesting shell commands, output them on a line beginning with exactly "CMD: " so aish can identify and optionally execute them. Be concise. Prefer concrete actions over explanations unless asked. You may have access to MCP tools — they appear in this request's `tools` field. Call a tool by emitting a tool_call; the result will be supplied in the next turn. Use tools for structured operations (file reads, queries, etc.) and `CMD:` lines for local shell commands. Prefer tools when available; fall back to `CMD:` for anything not exposed as a tool.]] local Context = {} Context.__index = Context function M.new(opts) opts = opts or {} return setmetatable({ system_prompt = opts.system_prompt or DEFAULT_SYSTEM_PROMPT, turns = {}, pending_exec_output = nil, -- buffered until next user turn (§6) max_turns = opts.max_turns or 40, token_budget = opts.token_budget or 4096, -- Phase 2: tool-role rendering toggle. true = emit OpenAI-standard -- role:"tool" messages from to_messages(); false = collapse -- assistant+tool_calls and tool turns into a single assistant text -- turn for chat templates that reject the role:"tool" shape. -- Default true per PHASE2.md §12 "Q18 default"; flip from caller. use_tool_role = (opts.use_tool_role == nil) and true or opts.use_tool_role, -- Phase 5: summarize-on-evict. When set, enforce_budget calls -- summarize_fn(prior_summary, evicted_turns) -> string | nil -- and updates ctx.summary instead of silently dropping turns. -- Callback contract per PHASE5.md R-B1: -- (nil, [turns]) → first-time summarize -- (str, [turns]) → additive: extend prior summary with new turns -- (str, nil) → compress: re-summarize the prior summary -- Returns nil → fall back to silent eviction (Phase 0 behavior). summarize_fn = opts.summarize_fn, summary = nil, -- rolling summary string max_summary_chars = opts.max_summary_chars or 2000, }, Context) end -- Append a turn. Phase 2 widens what's valid: -- role="user" content (string) required -- role="system" content (string) required (callers shouldn't add system -- turns directly; system prompt is stored separately and -- prepended at to_messages time per §6) -- role="assistant" content may be empty IF tool_calls is non-empty; -- otherwise content required -- role="tool" tool_call_id required + content required; the preceding -- stored turn must be an assistant turn with non-empty -- tool_calls (debug assertion catches sub-loop bugs early -- per PHASE2.md §3 row + N4 in review) function Context:append(turn) assert(type(turn) == "table" and turn.role, "context:append requires { role = ... }") local stored = { role = turn.role, content = turn.content or "" } if turn.role == "assistant" and turn.tool_calls and #turn.tool_calls > 0 then stored.tool_calls = turn.tool_calls elseif turn.role == "tool" then assert(turn.tool_call_id, "context:append role=tool requires tool_call_id") assert(turn.content, "context:append role=tool requires content") -- A tool turn may follow either an assistant-with-tool_calls (the -- first reply in the sub-loop) or another tool turn (subsequent -- replies when the assistant emitted multiple parallel tool_calls). -- Walk back through tool turns until we hit a non-tool; that turn -- must be an assistant with non-empty tool_calls. local j = #self.turns while j > 0 and self.turns[j].role == "tool" do j = j - 1 end local anchor = self.turns[j] assert(anchor and anchor.role == "assistant" and anchor.tool_calls and #anchor.tool_calls > 0, "context:append role=tool must follow assistant with tool_calls " .. "(possibly via prior tool turns in the same sub-loop)") stored.tool_call_id = turn.tool_call_id else assert(turn.content, "context:append requires content for role=" .. turn.role) end self.turns[#self.turns + 1] = stored end -- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output -- is NOT appended as its own user turn — strict chat templates (e.g. mistral- -- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is -- held until the next user turn arrives, then prepended via :append_user. function Context:append_exec_output(out) if not out or out == "" then return end local block = "[exec output]\n" .. out if self.pending_exec_output then self.pending_exec_output = self.pending_exec_output .. "\n" .. block else self.pending_exec_output = block end end -- Append a user turn, flushing any pending exec output as a prefix. Use this -- (rather than raw :append) for any turn whose role is "user". function Context:append_user(content) if self.pending_exec_output then content = self.pending_exec_output .. "\n\n" .. content self.pending_exec_output = nil end self:append({ role = "user", content = content }) end -- Compact JSON-ish rendering used by the fallback (use_tool_role=false) path -- to convert a tool_calls + tool-result pair into inline text. Not OpenAI- -- standard — only used when a strict chat template rejects role:"tool". local function inline_tool_call(call, result_content) return ("[tool: %s]\n%s\n[result]\n%s") :format(call.name or "?", tostring(call.arguments or ""), tostring(result_content or "")) end -- Render the messages array for broker.chat (system prompt prepended; turns -- in order). Phase 2 adds two emission modes: -- -- use_tool_role = true (default): pass through OpenAI-standard -- {role:"assistant", content, tool_calls} and {role:"tool", tool_call_id, -- content} turns unchanged. -- -- use_tool_role = false (fallback, Q18): collapse each -- assistant-with-tool_calls + its following role:"tool" turn(s) into a -- single assistant text turn carrying the synthesized "[tool: name]\n -- \n[result]\n" body. The role:"tool" turns and the -- tool_calls field are NOT emitted. Same logical alternation seen by the -- model (user → assistant → user → assistant), no strict-template breakage. -- -- The system prompt is NOT stored in self.turns per §6. -- Phase 4: [background] block composer. Memory items from memory.jsonl -- are stored on self.memory_items (loaded by repl.lua at startup) and -- rendered as a dim-styled suffix on the system prompt. Suppressed when -- norris_active to avoid stacking large background contexts in -- per-iteration broker calls (R-C1 review fold-in). Cap honored via -- inject_max_chars argument from the caller (already truncated by repl). local function compose_background(items) if not items or #items == 0 then return "" end local lines = { "", "", "[background] (memory.jsonl; manage via :memory)" } for _, it in ipairs(items) do lines[#lines + 1] = ("- (%s) %s"):format(it.kind or "?", (it.content or ""):gsub("\n", " ")) end return table.concat(lines, "\n") end -- Phase 5 R-C4: summary block composer. Mirrors the [background] -- pattern; suppressed under Norris (callers already guard, but the -- function returns "" for empty input regardless). local function compose_summary(summary_text) if not summary_text or summary_text == "" then return "" end return "\n\n[earlier conversation summary]\n" .. summary_text end -- Phase 3: NORRIS MODE suffix appended to the system prompt when -- self.norris_active. Carries self.norris_goal so eviction of the -- user's "[norris] goal: ..." turn doesn't lose the anchor. local NORRIS_SUFFIX_TEMPLATE = [[ [NORRIS MODE] You are operating autonomously toward the following goal: %s Plan and execute step by step using CMD: lines (for shell) or tool_calls (when MCP tools are available). After each action, you will see its result in the next turn. Re-plan based on what you observe. When the goal is achieved, emit a single line: GOAL: complete on its own line, optionally followed by a brief summary. If the goal is unreachable or you need user input, emit: GOAL: blocked with a one-line reason. Avoid destructive operations unless the goal explicitly requires them. The user will be prompted to confirm destructive actions; expect their verdict in the next turn as a synthesized "[aish] ... skipped by user" message if they declined.]] function Context:to_messages() local sys_content = self.system_prompt -- Phase 4 [background] memory block + Phase 5 [earlier summary] -- block. Both suppressed during Norris (R-C1 / R-C4 — avoid -- redundant tokens per planning iteration; planner stays focused -- on its goal anchor). if not self.norris_active then sys_content = sys_content .. compose_background(self.memory_items) sys_content = sys_content .. compose_summary(self.summary) end -- Phase 3 NORRIS MODE suffix. Last block so its instructions dominate. if self.norris_active and self.norris_goal then sys_content = sys_content .. string.format(NORRIS_SUFFIX_TEMPLATE, self.norris_goal) end local msgs = { { role = "system", content = sys_content } } if self.use_tool_role then for _, t in ipairs(self.turns) do local m = { role = t.role, content = t.content } if t.role == "assistant" and t.tool_calls then -- OpenAI shape wraps each call as -- {id, type:"function", function:{name, arguments}}. local oai = {} for i, c in ipairs(t.tool_calls) do oai[i] = { id = c.id, type = "function", ["function"] = { name = c.name, arguments = c.arguments or "" }, } end m.tool_calls = oai elseif t.role == "tool" then m.tool_call_id = t.tool_call_id end msgs[#msgs + 1] = m end return msgs end -- Fallback path: walk turns, collapse asst-with-tool_calls + following -- tool turns into a single asst text turn. Merge consecutive assistant -- turns afterward so the trailing post-tool-result assistant text -- doesn't produce asst/asst back-to-back (which strict templates would -- also reject — same gotcha PHASE0.md §6 warned about for user/user). local function push_or_merge_assistant(content) local last = msgs[#msgs] if last and last.role == "assistant" then last.content = last.content .. "\n" .. content else msgs[#msgs + 1] = { role = "assistant", content = content } end end local i = 1 while i <= #self.turns do local t = self.turns[i] if t.role == "assistant" and t.tool_calls then local parts = {} if t.content and t.content ~= "" then parts[#parts + 1] = t.content end for ci, call in ipairs(t.tool_calls) do local result_text = "" local next_t = self.turns[i + ci] if next_t and next_t.role == "tool" and next_t.tool_call_id == call.id then result_text = next_t.content end parts[#parts + 1] = inline_tool_call(call, result_text) end push_or_merge_assistant(table.concat(parts, "\n")) i = i + 1 + #t.tool_calls elseif t.role == "tool" then -- Orphan tool turn (no preceding asst-tool_calls captured it). -- Shouldn't happen given the :append assertion, but defensively -- drop it rather than emit a malformed message. i = i + 1 elseif t.role == "assistant" then push_or_merge_assistant(t.content or "") i = i + 1 else msgs[#msgs + 1] = { role = t.role, content = t.content } i = i + 1 end end return msgs end -- Evict the oldest pair (user + assistant) while we exceed max_turns. Returns -- total turns evicted. Caller is responsible for rendering the §8 status line. function Context:enforce_budget() local evicted = 0 while #self.turns > self.max_turns do -- Collect evicted slice (pair: user + assistant) local pair = {} pair[#pair + 1] = self.turns[1] if #self.turns >= 2 then pair[#pair + 1] = self.turns[2] end -- Phase 5: ask the summarize callback (if wired) to absorb this -- slice into the rolling summary. Callback contract per R-B1: -- summarize_fn(prior_summary, evicted_turns) -> string | nil -- nil return → silent eviction (Phase 0 behavior). if self.summarize_fn then local ok, new_summary = pcall(self.summarize_fn, self.summary, pair) if ok and type(new_summary) == "string" and new_summary ~= "" then self.summary = new_summary -- R-C1: if grown past cap, compress in a second pass. if #self.summary > self.max_summary_chars then local ok2, compressed = pcall(self.summarize_fn, self.summary, nil) if ok2 and type(compressed) == "string" and compressed ~= "" then self.summary = compressed end end end end -- Remove the pair from turns (matches Phase 0 visible effect) table.remove(self.turns, 1) evicted = evicted + 1 if #self.turns > 0 and (#self.turns > self.max_turns or evicted % 2 == 1) then table.remove(self.turns, 1) evicted = evicted + 1 end end return evicted end -- Coarse char/4 token estimate per §8. Phase 0 visibility only; accurate -- tokenization is Q1 (target Phase 3). function Context:estimate_tokens() local n = #self.system_prompt for _, t in ipairs(self.turns) do n = n + #t.content end return math.floor(n / 4) end function Context:reset() self.turns = {} self.pending_exec_output = nil self.summary = nil end return M