aish/context.lua

-- context.lua — in-memory conversation history + token budget.
-- Phase 0: ordered turn list, sliding-window eviction by max_turns.
-- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 3 (Q1).
-- Phase 2 (added 2026-05-12): support for `role:"tool"` turns and assistant
-- turns carrying `tool_calls = [...]`, plus a `use_tool_role` rendering
-- toggle for the strict-chat-template fallback path (Q18).
-- See docs/PHASE0.md §6, §8 and docs/PHASE2.md §3 / §5.

local M = {}

-- The §6 default system prompt. The `CMD: ` (exact prefix, single space)
-- contract is locked per §3 invariants — do not edit without amending PHASE0.
-- Phase 2 appends ~4 lines about MCP tools per PHASE2.md §8 (hybrid:
-- static frame here + dynamic tools list in the request body). The block
-- is always present even when no MCP servers are configured — the cost
-- is ~60 tokens and the model just sees instructions that don't apply.
local DEFAULT_SYSTEM_PROMPT = [[
You are aish, an AI-augmented shell assistant. You help the user execute shell
commands, write and debug code, and re-engineer software. When suggesting shell
commands, output them on a line beginning with exactly "CMD: " so aish can
identify and optionally execute them. Be concise. Prefer concrete actions over
explanations unless asked.

You may have access to MCP tools — they appear in this request's `tools` field.
Call a tool by emitting a tool_call; the result will be supplied in the next
turn. Use tools for structured operations (file reads, queries, etc.) and
`CMD:` lines for local shell commands. Prefer tools when available; fall back
to `CMD:` for anything not exposed as a tool.]]

local Context = {}
Context.__index = Context

function M.new(opts)
    opts = opts or {}
    return setmetatable({
        system_prompt        = opts.system_prompt or DEFAULT_SYSTEM_PROMPT,
        turns                = {},
        pending_exec_output  = nil,   -- buffered until next user turn (§6)
        max_turns            = opts.max_turns    or 40,
        token_budget         = opts.token_budget or 4096,
        -- Phase 2: tool-role rendering toggle. true = emit OpenAI-standard
        -- role:"tool" messages from to_messages(); false = collapse
        -- assistant+tool_calls and tool turns into a single assistant text
        -- turn for chat templates that reject the role:"tool" shape.
        -- Default true per PHASE2.md §12 "Q18 default"; flip from caller.
        use_tool_role        = (opts.use_tool_role == nil) and true
                                                          or  opts.use_tool_role,
    }, Context)
end

-- Append a turn. Phase 2 widens what's valid:
--   role="user"      content (string) required
--   role="system"    content (string) required (callers shouldn't add system
--                    turns directly; system prompt is stored separately and
--                    prepended at to_messages time per §6)
--   role="assistant" content may be empty IF tool_calls is non-empty;
--                    otherwise content required
--   role="tool"      tool_call_id required + content required; the preceding
--                    stored turn must be an assistant turn with non-empty
--                    tool_calls (debug assertion catches sub-loop bugs early
--                    per PHASE2.md §3 row + N4 in review)
function Context:append(turn)
    assert(type(turn) == "table" and turn.role,
           "context:append requires { role = ... }")
    local stored = { role = turn.role, content = turn.content or "" }
    if turn.role == "assistant" and turn.tool_calls and #turn.tool_calls > 0 then
        stored.tool_calls = turn.tool_calls
    elseif turn.role == "tool" then
        assert(turn.tool_call_id, "context:append role=tool requires tool_call_id")
        assert(turn.content, "context:append role=tool requires content")
        -- A tool turn may follow either an assistant-with-tool_calls (the
        -- first reply in the sub-loop) or another tool turn (subsequent
        -- replies when the assistant emitted multiple parallel tool_calls).
        -- Walk back through tool turns until we hit a non-tool; that turn
        -- must be an assistant with non-empty tool_calls.
        local j = #self.turns
        while j > 0 and self.turns[j].role == "tool" do j = j - 1 end
        local anchor = self.turns[j]
        assert(anchor and anchor.role == "assistant"
                      and anchor.tool_calls and #anchor.tool_calls > 0,
               "context:append role=tool must follow assistant with tool_calls "
               .. "(possibly via prior tool turns in the same sub-loop)")
        stored.tool_call_id = turn.tool_call_id
    else
        assert(turn.content, "context:append requires content for role=" .. turn.role)
    end
    self.turns[#self.turns + 1] = stored
end

-- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output
-- is NOT appended as its own user turn — strict chat templates (e.g. mistral-
-- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is
-- held until the next user turn arrives, then prepended via :append_user.
function Context:append_exec_output(out)
    if not out or out == "" then return end
    local block = "[exec output]\n" .. out
    if self.pending_exec_output then
        self.pending_exec_output = self.pending_exec_output .. "\n" .. block
    else
        self.pending_exec_output = block
    end
end

-- Append a user turn, flushing any pending exec output as a prefix. Use this
-- (rather than raw :append) for any turn whose role is "user".
function Context:append_user(content)
    if self.pending_exec_output then
        content = self.pending_exec_output .. "\n\n" .. content
        self.pending_exec_output = nil
    end
    self:append({ role = "user", content = content })
end

-- Compact JSON-ish rendering used by the fallback (use_tool_role=false) path
-- to convert a tool_calls + tool-result pair into inline text. Not OpenAI-
-- standard — only used when a strict chat template rejects role:"tool".
local function inline_tool_call(call, result_content)
    return ("[tool: %s]\n%s\n[result]\n%s")
        :format(call.name or "?",
                tostring(call.arguments or ""),
                tostring(result_content or ""))
end

-- Render the messages array for broker.chat (system prompt prepended; turns
-- in order). Phase 2 adds two emission modes:
--
--   use_tool_role = true (default): pass through OpenAI-standard
--     {role:"assistant", content, tool_calls} and {role:"tool", tool_call_id,
--     content} turns unchanged.
--
--   use_tool_role = false (fallback, Q18): collapse each
--     assistant-with-tool_calls + its following role:"tool" turn(s) into a
--     single assistant text turn carrying the synthesized "[tool: name]\n
--     <args>\n[result]\n<content>" body. The role:"tool" turns and the
--     tool_calls field are NOT emitted. Same logical alternation seen by the
--     model (user → assistant → user → assistant), no strict-template breakage.
--
-- The system prompt is NOT stored in self.turns per §6.
function Context:to_messages()
    local msgs = { { role = "system", content = self.system_prompt } }

    if self.use_tool_role then
        for _, t in ipairs(self.turns) do
            local m = { role = t.role, content = t.content }
            if t.role == "assistant" and t.tool_calls then
                -- OpenAI shape wraps each call as
                -- {id, type:"function", function:{name, arguments}}.
                local oai = {}
                for i, c in ipairs(t.tool_calls) do
                    oai[i] = {
                        id   = c.id,
                        type = "function",
                        ["function"] = { name = c.name,
                                         arguments = c.arguments or "" },
                    }
                end
                m.tool_calls = oai
            elseif t.role == "tool" then
                m.tool_call_id = t.tool_call_id
            end
            msgs[#msgs + 1] = m
        end
        return msgs
    end

    -- Fallback path: walk turns, collapse asst-with-tool_calls + following
    -- tool turns into a single asst text turn. Merge consecutive assistant
    -- turns afterward so the trailing post-tool-result assistant text
    -- doesn't produce asst/asst back-to-back (which strict templates would
    -- also reject — same gotcha PHASE0.md §6 warned about for user/user).
    local function push_or_merge_assistant(content)
        local last = msgs[#msgs]
        if last and last.role == "assistant" then
            last.content = last.content .. "\n" .. content
        else
            msgs[#msgs + 1] = { role = "assistant", content = content }
        end
    end

    local i = 1
    while i <= #self.turns do
        local t = self.turns[i]
        if t.role == "assistant" and t.tool_calls then
            local parts = {}
            if t.content and t.content ~= "" then
                parts[#parts + 1] = t.content
            end
            for ci, call in ipairs(t.tool_calls) do
                local result_text = ""
                local next_t = self.turns[i + ci]
                if next_t and next_t.role == "tool"
                          and next_t.tool_call_id == call.id then
                    result_text = next_t.content
                end
                parts[#parts + 1] = inline_tool_call(call, result_text)
            end
            push_or_merge_assistant(table.concat(parts, "\n"))
            i = i + 1 + #t.tool_calls
        elseif t.role == "tool" then
            -- Orphan tool turn (no preceding asst-tool_calls captured it).
            -- Shouldn't happen given the :append assertion, but defensively
            -- drop it rather than emit a malformed message.
            i = i + 1
        elseif t.role == "assistant" then
            push_or_merge_assistant(t.content or "")
            i = i + 1
        else
            msgs[#msgs + 1] = { role = t.role, content = t.content }
            i = i + 1
        end
    end
    return msgs
end

-- Evict the oldest pair (user + assistant) while we exceed max_turns. Returns
-- total turns evicted. Caller is responsible for rendering the §8 status line.
function Context:enforce_budget()
    local evicted = 0
    while #self.turns > self.max_turns do
        table.remove(self.turns, 1)
        evicted = evicted + 1
        if #self.turns > self.max_turns or evicted % 2 == 1 then
            if #self.turns > 0 then
                table.remove(self.turns, 1)
                evicted = evicted + 1
            end
        end
    end
    return evicted
end

-- Coarse char/4 token estimate per §8. Phase 0 visibility only; accurate
-- tokenization is Q1 (target Phase 3).
function Context:estimate_tokens()
    local n = #self.system_prompt
    for _, t in ipairs(self.turns) do
        n = n + #t.content
    end
    return math.floor(n / 4)
end

function Context:reset()
    self.turns = {}
    self.pending_exec_output = nil
end

return M