-- context.lua — in-memory conversation history + token budget. -- Phase 0: ordered turn list, sliding-window eviction by max_turns. -- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 3 (Q1). -- Phase 2 (added 2026-05-12): support for `role:"tool"` turns and assistant -- turns carrying `tool_calls = [...]`, plus a `use_tool_role` rendering -- toggle for the strict-chat-template fallback path (Q18). -- See docs/PHASE0.md §6, §8 and docs/PHASE2.md §3 / §5. local M = {} -- The §6 default system prompt. The `CMD: ` (exact prefix, single space) -- contract is locked per §3 invariants — do not edit without amending PHASE0. -- Phase 2 appends ~4 lines about MCP tools per PHASE2.md §8 (hybrid: -- static frame here + dynamic tools list in the request body). The block -- is always present even when no MCP servers are configured — the cost -- is ~60 tokens and the model just sees instructions that don't apply. local DEFAULT_SYSTEM_PROMPT = [[ You are aish, an AI-augmented shell assistant. You help the user execute shell commands, write and debug code, and re-engineer software. When suggesting shell commands, output them on a line beginning with exactly "CMD: " so aish can identify and optionally execute them. Be concise. Prefer concrete actions over explanations unless asked. You may have access to MCP tools — they appear in this request's `tools` field. Call a tool by emitting a tool_call; the result will be supplied in the next turn. Use tools for structured operations (file reads, queries, etc.) and `CMD:` lines for local shell commands. Prefer tools when available; fall back to `CMD:` for anything not exposed as a tool.]] local Context = {} Context.__index = Context function M.new(opts) opts = opts or {} return setmetatable({ system_prompt = opts.system_prompt or DEFAULT_SYSTEM_PROMPT, turns = {}, pending_exec_output = nil, -- buffered until next user turn (§6) max_turns = opts.max_turns or 40, token_budget = opts.token_budget or 4096, -- Phase 2: tool-role rendering toggle. true = emit OpenAI-standard -- role:"tool" messages from to_messages(); false = collapse -- assistant+tool_calls and tool turns into a single assistant text -- turn for chat templates that reject the role:"tool" shape. -- Default true per PHASE2.md §12 "Q18 default"; flip from caller. use_tool_role = (opts.use_tool_role == nil) and true or opts.use_tool_role, }, Context) end -- Append a turn. Phase 2 widens what's valid: -- role="user" content (string) required -- role="system" content (string) required (callers shouldn't add system -- turns directly; system prompt is stored separately and -- prepended at to_messages time per §6) -- role="assistant" content may be empty IF tool_calls is non-empty; -- otherwise content required -- role="tool" tool_call_id required + content required; the preceding -- stored turn must be an assistant turn with non-empty -- tool_calls (debug assertion catches sub-loop bugs early -- per PHASE2.md §3 row + N4 in review) function Context:append(turn) assert(type(turn) == "table" and turn.role, "context:append requires { role = ... }") local stored = { role = turn.role, content = turn.content or "" } if turn.role == "assistant" and turn.tool_calls and #turn.tool_calls > 0 then stored.tool_calls = turn.tool_calls elseif turn.role == "tool" then assert(turn.tool_call_id, "context:append role=tool requires tool_call_id") assert(turn.content, "context:append role=tool requires content") -- A tool turn may follow either an assistant-with-tool_calls (the -- first reply in the sub-loop) or another tool turn (subsequent -- replies when the assistant emitted multiple parallel tool_calls). -- Walk back through tool turns until we hit a non-tool; that turn -- must be an assistant with non-empty tool_calls. local j = #self.turns while j > 0 and self.turns[j].role == "tool" do j = j - 1 end local anchor = self.turns[j] assert(anchor and anchor.role == "assistant" and anchor.tool_calls and #anchor.tool_calls > 0, "context:append role=tool must follow assistant with tool_calls " .. "(possibly via prior tool turns in the same sub-loop)") stored.tool_call_id = turn.tool_call_id else assert(turn.content, "context:append requires content for role=" .. turn.role) end self.turns[#self.turns + 1] = stored end -- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output -- is NOT appended as its own user turn — strict chat templates (e.g. mistral- -- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is -- held until the next user turn arrives, then prepended via :append_user. function Context:append_exec_output(out) if not out or out == "" then return end local block = "[exec output]\n" .. out if self.pending_exec_output then self.pending_exec_output = self.pending_exec_output .. "\n" .. block else self.pending_exec_output = block end end -- Append a user turn, flushing any pending exec output as a prefix. Use this -- (rather than raw :append) for any turn whose role is "user". function Context:append_user(content) if self.pending_exec_output then content = self.pending_exec_output .. "\n\n" .. content self.pending_exec_output = nil end self:append({ role = "user", content = content }) end -- Compact JSON-ish rendering used by the fallback (use_tool_role=false) path -- to convert a tool_calls + tool-result pair into inline text. Not OpenAI- -- standard — only used when a strict chat template rejects role:"tool". local function inline_tool_call(call, result_content) return ("[tool: %s]\n%s\n[result]\n%s") :format(call.name or "?", tostring(call.arguments or ""), tostring(result_content or "")) end -- Render the messages array for broker.chat (system prompt prepended; turns -- in order). Phase 2 adds two emission modes: -- -- use_tool_role = true (default): pass through OpenAI-standard -- {role:"assistant", content, tool_calls} and {role:"tool", tool_call_id, -- content} turns unchanged. -- -- use_tool_role = false (fallback, Q18): collapse each -- assistant-with-tool_calls + its following role:"tool" turn(s) into a -- single assistant text turn carrying the synthesized "[tool: name]\n -- \n[result]\n" body. The role:"tool" turns and the -- tool_calls field are NOT emitted. Same logical alternation seen by the -- model (user → assistant → user → assistant), no strict-template breakage. -- -- The system prompt is NOT stored in self.turns per §6. function Context:to_messages() local msgs = { { role = "system", content = self.system_prompt } } if self.use_tool_role then for _, t in ipairs(self.turns) do local m = { role = t.role, content = t.content } if t.role == "assistant" and t.tool_calls then -- OpenAI shape wraps each call as -- {id, type:"function", function:{name, arguments}}. local oai = {} for i, c in ipairs(t.tool_calls) do oai[i] = { id = c.id, type = "function", ["function"] = { name = c.name, arguments = c.arguments or "" }, } end m.tool_calls = oai elseif t.role == "tool" then m.tool_call_id = t.tool_call_id end msgs[#msgs + 1] = m end return msgs end -- Fallback path: walk turns, collapse asst-with-tool_calls + following -- tool turns into a single asst text turn. Merge consecutive assistant -- turns afterward so the trailing post-tool-result assistant text -- doesn't produce asst/asst back-to-back (which strict templates would -- also reject — same gotcha PHASE0.md §6 warned about for user/user). local function push_or_merge_assistant(content) local last = msgs[#msgs] if last and last.role == "assistant" then last.content = last.content .. "\n" .. content else msgs[#msgs + 1] = { role = "assistant", content = content } end end local i = 1 while i <= #self.turns do local t = self.turns[i] if t.role == "assistant" and t.tool_calls then local parts = {} if t.content and t.content ~= "" then parts[#parts + 1] = t.content end for ci, call in ipairs(t.tool_calls) do local result_text = "" local next_t = self.turns[i + ci] if next_t and next_t.role == "tool" and next_t.tool_call_id == call.id then result_text = next_t.content end parts[#parts + 1] = inline_tool_call(call, result_text) end push_or_merge_assistant(table.concat(parts, "\n")) i = i + 1 + #t.tool_calls elseif t.role == "tool" then -- Orphan tool turn (no preceding asst-tool_calls captured it). -- Shouldn't happen given the :append assertion, but defensively -- drop it rather than emit a malformed message. i = i + 1 elseif t.role == "assistant" then push_or_merge_assistant(t.content or "") i = i + 1 else msgs[#msgs + 1] = { role = t.role, content = t.content } i = i + 1 end end return msgs end -- Evict the oldest pair (user + assistant) while we exceed max_turns. Returns -- total turns evicted. Caller is responsible for rendering the §8 status line. function Context:enforce_budget() local evicted = 0 while #self.turns > self.max_turns do table.remove(self.turns, 1) evicted = evicted + 1 if #self.turns > self.max_turns or evicted % 2 == 1 then if #self.turns > 0 then table.remove(self.turns, 1) evicted = evicted + 1 end end end return evicted end -- Coarse char/4 token estimate per §8. Phase 0 visibility only; accurate -- tokenization is Q1 (target Phase 3). function Context:estimate_tokens() local n = #self.system_prompt for _, t in ipairs(self.turns) do n = n + #t.content end return math.floor(n / 4) end function Context:reset() self.turns = {} self.pending_exec_output = nil end return M