7b4a9becc2
Adds the per-conversation accumulator that broker.lua's
on_delta("usage", ...) payload feeds into. No callers yet —
commit #3 wires the broker callback to ctx:add_usage in repl.lua,
commit #4 in safety.lua.
Changes:
- Context.new: new fields `usage_totals = {}` and
`cost_warn_state = { dollars = false, tokens = false }`. R4:
two independent flags so warn_at_dollars firing doesn't
suppress warn_at_tokens (or vice versa).
- Context:add_usage(model_name, category, usage):
Increments usage_totals[model_name][category] slot. R6: when
usage.cost is nil (local llama.cpp per B3), sets a sticky
`is_local = true` flag on the slot AND does NOT add to cost
(preserves the local-vs-cloud-zero distinction for :cost detail
annotation). When usage.cost is a number (cloud), accumulates.
- Context:total_cost() / total_tokens() — pure-Lua summation
across all slots; total_tokens returns (prompt, completion).
- Context:reset_usage() — explicit :cost reset path; zeros
usage_totals AND clears both flags atomically.
- Context:reset() — R8 parity: does NOT clear usage_totals OR
cost_warn_state. Matches the Phase 4 memory_items / Phase 6
project rule ("ambient context survives a user-driven
conversation reset").
Smoke verified (20/20 unit cases):
- Empty zeros; cloud cost accumulation; local nil-cost preserves
is_local=true sticky; calls counter; cost summation across
multiple cloud calls; is_local sticky after a later nil-cost
call on a cloud slot; separate slots per (model, category);
:reset preserves; :reset_usage zeros both totals and flags.
Regression: test_safety 87/87, test_router_model 31/31, repl loads.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
437 lines
19 KiB
Lua
437 lines
19 KiB
Lua
-- context.lua — in-memory conversation history + token budget.
|
|
-- Phase 0: ordered turn list, sliding-window eviction by max_turns.
|
|
-- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 3 (Q1).
|
|
-- Phase 2 (added 2026-05-12): support for `role:"tool"` turns and assistant
|
|
-- turns carrying `tool_calls = [...]`, plus a `use_tool_role` rendering
|
|
-- toggle for the strict-chat-template fallback path (Q18).
|
|
-- See docs/PHASE0.md §6, §8 and docs/PHASE2.md §3 / §5.
|
|
|
|
local M = {}
|
|
|
|
-- The §6 default system prompt. The `CMD: ` (exact prefix, single space)
|
|
-- contract is locked per §3 invariants — do not edit without amending PHASE0.
|
|
-- Phase 2 appends ~4 lines about MCP tools per PHASE2.md §8 (hybrid:
|
|
-- static frame here + dynamic tools list in the request body). The block
|
|
-- is always present even when no MCP servers are configured — the cost
|
|
-- is ~60 tokens and the model just sees instructions that don't apply.
|
|
local DEFAULT_SYSTEM_PROMPT = [[
|
|
You are aish, an AI-augmented shell assistant. You help the user execute shell
|
|
commands, write and debug code, and re-engineer software. When suggesting shell
|
|
commands, output them on a line beginning with exactly "CMD: " so aish can
|
|
identify and optionally execute them. Be concise. Prefer concrete actions over
|
|
explanations unless asked.
|
|
|
|
You may have access to MCP tools — they appear in this request's `tools` field.
|
|
Call a tool by emitting a tool_call; the result will be supplied in the next
|
|
turn. Use tools for structured operations (file reads, queries, etc.) and
|
|
`CMD:` lines for local shell commands. Prefer tools when available; fall back
|
|
to `CMD:` for anything not exposed as a tool.]]
|
|
|
|
local Context = {}
|
|
Context.__index = Context
|
|
|
|
function M.new(opts)
|
|
opts = opts or {}
|
|
return setmetatable({
|
|
system_prompt = opts.system_prompt or DEFAULT_SYSTEM_PROMPT,
|
|
turns = {},
|
|
pending_exec_output = nil, -- buffered until next user turn (§6)
|
|
max_turns = opts.max_turns or 40,
|
|
token_budget = opts.token_budget or 4096,
|
|
-- Phase 2: tool-role rendering toggle. true = emit OpenAI-standard
|
|
-- role:"tool" messages from to_messages(); false = collapse
|
|
-- assistant+tool_calls and tool turns into a single assistant text
|
|
-- turn for chat templates that reject the role:"tool" shape.
|
|
-- Default true per PHASE2.md §12 "Q18 default"; flip from caller.
|
|
use_tool_role = (opts.use_tool_role == nil) and true
|
|
or opts.use_tool_role,
|
|
-- Phase 5: summarize-on-evict. When set, enforce_budget calls
|
|
-- summarize_fn(prior_summary, evicted_turns) -> string | nil
|
|
-- and updates ctx.summary instead of silently dropping turns.
|
|
-- Callback contract per PHASE5.md R-B1:
|
|
-- (nil, [turns]) → first-time summarize
|
|
-- (str, [turns]) → additive: extend prior summary with new turns
|
|
-- (str, nil) → compress: re-summarize the prior summary
|
|
-- Returns nil → fall back to silent eviction (Phase 0 behavior).
|
|
summarize_fn = opts.summarize_fn,
|
|
summary = nil, -- rolling summary string
|
|
max_summary_chars = opts.max_summary_chars or 2000,
|
|
-- Phase 6 (#issue Phase 6 §6): project file-tree block, set by
|
|
-- repl.lua via :tree meta or the cfg.project.auto_tree startup
|
|
-- hook. nil = no block injected. Cached scan opts (depth /
|
|
-- max_chars overrides) live on _project_opts for :tree refresh.
|
|
project = nil,
|
|
_project_opts = nil,
|
|
-- Phase 7 (docs/PHASE7.md): cost/usage accumulator. Keyed as
|
|
-- usage_totals[model_name][category] -> { prompt, completion,
|
|
-- calls, cost, is_local }. is_local (R6) is a sticky flag
|
|
-- set when ANY recorded usage for the slot had cost==nil
|
|
-- (preserves local-vs-cloud-zero distinction for :cost detail
|
|
-- annotation). cost_warn_state (R4) carries per-threshold
|
|
-- one-shot flags so warn_at_dollars firing doesn't suppress
|
|
-- warn_at_tokens. Both survive :reset (R8 parity).
|
|
usage_totals = {},
|
|
cost_warn_state = { dollars = false, tokens = false },
|
|
}, Context)
|
|
end
|
|
|
|
-- Append a turn. Phase 2 widens what's valid:
|
|
-- role="user" content (string) required
|
|
-- role="system" content (string) required (callers shouldn't add system
|
|
-- turns directly; system prompt is stored separately and
|
|
-- prepended at to_messages time per §6)
|
|
-- role="assistant" content may be empty IF tool_calls is non-empty;
|
|
-- otherwise content required
|
|
-- role="tool" tool_call_id required + content required; the preceding
|
|
-- stored turn must be an assistant turn with non-empty
|
|
-- tool_calls (debug assertion catches sub-loop bugs early
|
|
-- per PHASE2.md §3 row + N4 in review)
|
|
function Context:append(turn)
|
|
assert(type(turn) == "table" and turn.role,
|
|
"context:append requires { role = ... }")
|
|
local stored = { role = turn.role, content = turn.content or "" }
|
|
if turn.role == "assistant" and turn.tool_calls and #turn.tool_calls > 0 then
|
|
stored.tool_calls = turn.tool_calls
|
|
elseif turn.role == "tool" then
|
|
assert(turn.tool_call_id, "context:append role=tool requires tool_call_id")
|
|
assert(turn.content, "context:append role=tool requires content")
|
|
-- A tool turn may follow either an assistant-with-tool_calls (the
|
|
-- first reply in the sub-loop) or another tool turn (subsequent
|
|
-- replies when the assistant emitted multiple parallel tool_calls).
|
|
-- Walk back through tool turns until we hit a non-tool; that turn
|
|
-- must be an assistant with non-empty tool_calls.
|
|
local j = #self.turns
|
|
while j > 0 and self.turns[j].role == "tool" do j = j - 1 end
|
|
local anchor = self.turns[j]
|
|
assert(anchor and anchor.role == "assistant"
|
|
and anchor.tool_calls and #anchor.tool_calls > 0,
|
|
"context:append role=tool must follow assistant with tool_calls "
|
|
.. "(possibly via prior tool turns in the same sub-loop)")
|
|
stored.tool_call_id = turn.tool_call_id
|
|
else
|
|
assert(turn.content, "context:append requires content for role=" .. turn.role)
|
|
end
|
|
self.turns[#self.turns + 1] = stored
|
|
end
|
|
|
|
-- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output
|
|
-- is NOT appended as its own user turn — strict chat templates (e.g. mistral-
|
|
-- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is
|
|
-- held until the next user turn arrives, then prepended via :append_user.
|
|
function Context:append_exec_output(out)
|
|
if not out or out == "" then return end
|
|
local block = "[exec output]\n" .. out
|
|
if self.pending_exec_output then
|
|
self.pending_exec_output = self.pending_exec_output .. "\n" .. block
|
|
else
|
|
self.pending_exec_output = block
|
|
end
|
|
end
|
|
|
|
-- Append a user turn, flushing any pending exec output as a prefix. Use this
|
|
-- (rather than raw :append) for any turn whose role is "user".
|
|
function Context:append_user(content)
|
|
if self.pending_exec_output then
|
|
content = self.pending_exec_output .. "\n\n" .. content
|
|
self.pending_exec_output = nil
|
|
end
|
|
self:append({ role = "user", content = content })
|
|
end
|
|
|
|
-- Compact JSON-ish rendering used by the fallback (use_tool_role=false) path
|
|
-- to convert a tool_calls + tool-result pair into inline text. Not OpenAI-
|
|
-- standard — only used when a strict chat template rejects role:"tool".
|
|
local function inline_tool_call(call, result_content)
|
|
return ("[tool: %s]\n%s\n[result]\n%s")
|
|
:format(call.name or "?",
|
|
tostring(call.arguments or ""),
|
|
tostring(result_content or ""))
|
|
end
|
|
|
|
-- Render the messages array for broker.chat (system prompt prepended; turns
|
|
-- in order). Phase 2 adds two emission modes:
|
|
--
|
|
-- use_tool_role = true (default): pass through OpenAI-standard
|
|
-- {role:"assistant", content, tool_calls} and {role:"tool", tool_call_id,
|
|
-- content} turns unchanged.
|
|
--
|
|
-- use_tool_role = false (fallback, Q18): collapse each
|
|
-- assistant-with-tool_calls + its following role:"tool" turn(s) into a
|
|
-- single assistant text turn carrying the synthesized "[tool: name]\n
|
|
-- <args>\n[result]\n<content>" body. The role:"tool" turns and the
|
|
-- tool_calls field are NOT emitted. Same logical alternation seen by the
|
|
-- model (user → assistant → user → assistant), no strict-template breakage.
|
|
--
|
|
-- The system prompt is NOT stored in self.turns per §6.
|
|
-- Phase 4: [background] block composer. Memory items from memory.jsonl
|
|
-- are stored on self.memory_items (loaded by repl.lua at startup) and
|
|
-- rendered as a dim-styled suffix on the system prompt. Suppressed when
|
|
-- norris_active to avoid stacking large background contexts in
|
|
-- per-iteration broker calls (R-C1 review fold-in). Cap honored via
|
|
-- inject_max_chars argument from the caller (already truncated by repl).
|
|
local function compose_background(items)
|
|
if not items or #items == 0 then return "" end
|
|
local lines = { "", "", "[background] (memory.jsonl; manage via :memory)" }
|
|
for _, it in ipairs(items) do
|
|
lines[#lines + 1] =
|
|
("- (%s) %s"):format(it.kind or "?", (it.content or ""):gsub("\n", " "))
|
|
end
|
|
return table.concat(lines, "\n")
|
|
end
|
|
|
|
-- Phase 5 R-C4: summary block composer. Mirrors the [background]
|
|
-- pattern; suppressed under Norris (callers already guard, but the
|
|
-- function returns "" for empty input regardless).
|
|
local function compose_summary(summary_text)
|
|
if not summary_text or summary_text == "" then return "" end
|
|
return "\n\n[earlier conversation summary]\n" .. summary_text
|
|
end
|
|
|
|
-- Phase 6: project file-tree composer. Inserted between [background]
|
|
-- and [earlier summary] so the reading order is memory facts →
|
|
-- project tree → earlier conversation → NORRIS suffix. Same Norris-
|
|
-- suppression rule (callers gate via self.norris_active).
|
|
local function compose_project(project_text)
|
|
if not project_text or project_text == "" then return "" end
|
|
return "\n\n[project]\n" .. project_text
|
|
end
|
|
|
|
-- Phase 3: NORRIS MODE suffix appended to the system prompt when
|
|
-- self.norris_active. Carries self.norris_goal so eviction of the
|
|
-- user's "[norris] goal: ..." turn doesn't lose the anchor.
|
|
local NORRIS_SUFFIX_TEMPLATE = [[
|
|
|
|
|
|
[NORRIS MODE] You are operating autonomously toward the following goal:
|
|
|
|
%s
|
|
|
|
Plan and execute step by step using CMD: lines (for shell) or tool_calls
|
|
(when MCP tools are available). After each action, you will see its
|
|
result in the next turn. Re-plan based on what you observe.
|
|
|
|
When the goal is achieved, emit a single line:
|
|
GOAL: complete
|
|
on its own line, optionally followed by a brief summary.
|
|
|
|
If the goal is unreachable or you need user input, emit:
|
|
GOAL: blocked
|
|
with a one-line reason.
|
|
|
|
Avoid destructive operations unless the goal explicitly requires them.
|
|
The user will be prompted to confirm destructive actions; expect their
|
|
verdict in the next turn as a synthesized "[aish] ... skipped by user"
|
|
message if they declined.]]
|
|
|
|
function Context:to_messages()
|
|
local sys_content = self.system_prompt
|
|
-- Phase 4 [background] memory block + Phase 6 [project] file-tree
|
|
-- block + Phase 5 [earlier summary] block. All suppressed during
|
|
-- Norris (R-C1 / R-C4 — avoid redundant tokens per planning
|
|
-- iteration; planner stays focused on its goal anchor).
|
|
if not self.norris_active then
|
|
sys_content = sys_content .. compose_background(self.memory_items)
|
|
sys_content = sys_content .. compose_project(self.project)
|
|
sys_content = sys_content .. compose_summary(self.summary)
|
|
end
|
|
-- Phase 3 NORRIS MODE suffix. Last block so its instructions dominate.
|
|
if self.norris_active and self.norris_goal then
|
|
sys_content = sys_content
|
|
.. string.format(NORRIS_SUFFIX_TEMPLATE, self.norris_goal)
|
|
end
|
|
local msgs = { { role = "system", content = sys_content } }
|
|
|
|
if self.use_tool_role then
|
|
for _, t in ipairs(self.turns) do
|
|
local m = { role = t.role, content = t.content }
|
|
if t.role == "assistant" and t.tool_calls then
|
|
-- OpenAI shape wraps each call as
|
|
-- {id, type:"function", function:{name, arguments}}.
|
|
local oai = {}
|
|
for i, c in ipairs(t.tool_calls) do
|
|
oai[i] = {
|
|
id = c.id,
|
|
type = "function",
|
|
["function"] = { name = c.name,
|
|
arguments = c.arguments or "" },
|
|
}
|
|
end
|
|
m.tool_calls = oai
|
|
elseif t.role == "tool" then
|
|
m.tool_call_id = t.tool_call_id
|
|
end
|
|
msgs[#msgs + 1] = m
|
|
end
|
|
return msgs
|
|
end
|
|
|
|
-- Fallback path: walk turns, collapse asst-with-tool_calls + following
|
|
-- tool turns into a single asst text turn. Merge consecutive assistant
|
|
-- turns afterward so the trailing post-tool-result assistant text
|
|
-- doesn't produce asst/asst back-to-back (which strict templates would
|
|
-- also reject — same gotcha PHASE0.md §6 warned about for user/user).
|
|
local function push_or_merge_assistant(content)
|
|
local last = msgs[#msgs]
|
|
if last and last.role == "assistant" then
|
|
last.content = last.content .. "\n" .. content
|
|
else
|
|
msgs[#msgs + 1] = { role = "assistant", content = content }
|
|
end
|
|
end
|
|
|
|
local i = 1
|
|
while i <= #self.turns do
|
|
local t = self.turns[i]
|
|
if t.role == "assistant" and t.tool_calls then
|
|
local parts = {}
|
|
if t.content and t.content ~= "" then
|
|
parts[#parts + 1] = t.content
|
|
end
|
|
for ci, call in ipairs(t.tool_calls) do
|
|
local result_text = ""
|
|
local next_t = self.turns[i + ci]
|
|
if next_t and next_t.role == "tool"
|
|
and next_t.tool_call_id == call.id then
|
|
result_text = next_t.content
|
|
end
|
|
parts[#parts + 1] = inline_tool_call(call, result_text)
|
|
end
|
|
push_or_merge_assistant(table.concat(parts, "\n"))
|
|
i = i + 1 + #t.tool_calls
|
|
elseif t.role == "tool" then
|
|
-- Orphan tool turn (no preceding asst-tool_calls captured it).
|
|
-- Shouldn't happen given the :append assertion, but defensively
|
|
-- drop it rather than emit a malformed message.
|
|
i = i + 1
|
|
elseif t.role == "assistant" then
|
|
push_or_merge_assistant(t.content or "")
|
|
i = i + 1
|
|
else
|
|
msgs[#msgs + 1] = { role = t.role, content = t.content }
|
|
i = i + 1
|
|
end
|
|
end
|
|
return msgs
|
|
end
|
|
|
|
-- Evict the oldest pair (user + assistant) while we exceed max_turns. Returns
|
|
-- total turns evicted. Caller is responsible for rendering the §8 status line.
|
|
function Context:enforce_budget()
|
|
local evicted = 0
|
|
while #self.turns > self.max_turns do
|
|
-- Collect evicted slice (pair: user + assistant)
|
|
local pair = {}
|
|
pair[#pair + 1] = self.turns[1]
|
|
if #self.turns >= 2 then pair[#pair + 1] = self.turns[2] end
|
|
|
|
-- Phase 5: ask the summarize callback (if wired) to absorb this
|
|
-- slice into the rolling summary. Callback contract per R-B1:
|
|
-- summarize_fn(prior_summary, evicted_turns) -> string | nil
|
|
-- nil return → silent eviction (Phase 0 behavior).
|
|
if self.summarize_fn then
|
|
local ok, new_summary = pcall(self.summarize_fn, self.summary, pair)
|
|
if ok and type(new_summary) == "string" and new_summary ~= "" then
|
|
self.summary = new_summary
|
|
-- R-C1: if grown past cap, compress in a second pass.
|
|
if #self.summary > self.max_summary_chars then
|
|
local ok2, compressed = pcall(self.summarize_fn,
|
|
self.summary, nil)
|
|
if ok2 and type(compressed) == "string"
|
|
and compressed ~= "" then
|
|
self.summary = compressed
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Remove the pair from turns (matches Phase 0 visible effect)
|
|
table.remove(self.turns, 1)
|
|
evicted = evicted + 1
|
|
if #self.turns > 0 and (#self.turns > self.max_turns
|
|
or evicted % 2 == 1) then
|
|
table.remove(self.turns, 1)
|
|
evicted = evicted + 1
|
|
end
|
|
end
|
|
return evicted
|
|
end
|
|
|
|
-- Coarse char/4 token estimate per §8. Phase 0 visibility only; accurate
|
|
-- tokenization is Q1 (target Phase 3).
|
|
function Context:estimate_tokens()
|
|
local n = #self.system_prompt
|
|
for _, t in ipairs(self.turns) do
|
|
n = n + #t.content
|
|
end
|
|
return math.floor(n / 4)
|
|
end
|
|
|
|
-- Phase 7: cost/usage accumulator helpers.
|
|
--
|
|
-- Context:add_usage(model_name, category, usage)
|
|
-- Increment the (model, category) slot. usage is the payload from
|
|
-- broker.lua's on_delta("usage", ...): { prompt_tokens, completion_
|
|
-- tokens, total_tokens, cost (nil for local per R6), model, category }.
|
|
-- We use the model_name + category args (not the payload fields)
|
|
-- because the caller may want to normalize (e.g., key by req_cfg
|
|
-- alias rather than model_cfg.model).
|
|
function Context:add_usage(model_name, category, usage)
|
|
model_name = model_name or "?"
|
|
category = category or "main"
|
|
self.usage_totals = self.usage_totals or {}
|
|
local m = self.usage_totals[model_name] or {}
|
|
local c = m[category] or {
|
|
prompt = 0, completion = 0, calls = 0, cost = 0,
|
|
-- R6: sticky flag; set once any nil-cost usage lands here.
|
|
is_local = false,
|
|
}
|
|
c.prompt = c.prompt + (usage.prompt_tokens or 0)
|
|
c.completion = c.completion + (usage.completion_tokens or 0)
|
|
c.calls = c.calls + 1
|
|
if usage.cost == nil then
|
|
c.is_local = true -- preserves local-vs-cloud-zero per R6
|
|
else
|
|
c.cost = c.cost + usage.cost
|
|
end
|
|
m[category] = c
|
|
self.usage_totals[model_name] = m
|
|
end
|
|
|
|
function Context:total_cost()
|
|
local total = 0
|
|
for _, m in pairs(self.usage_totals or {}) do
|
|
for _, c in pairs(m) do total = total + (c.cost or 0) end
|
|
end
|
|
return total
|
|
end
|
|
|
|
-- Returns (prompt_tokens, completion_tokens) summed across all slots.
|
|
function Context:total_tokens()
|
|
local p, comp = 0, 0
|
|
for _, m in pairs(self.usage_totals or {}) do
|
|
for _, c in pairs(m) do
|
|
p = p + (c.prompt or 0)
|
|
comp = comp + (c.completion or 0)
|
|
end
|
|
end
|
|
return p, comp
|
|
end
|
|
|
|
-- :cost reset path — zero accumulator AND clear per-threshold one-shot flags.
|
|
function Context:reset_usage()
|
|
self.usage_totals = {}
|
|
self.cost_warn_state = { dollars = false, tokens = false }
|
|
end
|
|
|
|
function Context:reset()
|
|
self.turns = {}
|
|
self.pending_exec_output = nil
|
|
self.summary = nil
|
|
-- R8 parity: usage_totals + cost_warn_state preserved (matches
|
|
-- memory_items + project — "ambient context survives a user-
|
|
-- driven conversation reset"). Use :reset_usage to zero the
|
|
-- cost meter explicitly.
|
|
end
|
|
|
|
return M
|