16490e6905
User-test surfaced the bug: with `deep` (mistral-nemo-12b) active,
running `list files` -> y on `CMD: ls` -> `Are there directory entries
beginning with "lor"?` returned a Jinja exception:
api: ... Error: Jinja Exception: After the optional system message,
conversation roles must alternate user/assistant/user/assistant/...
Cause: §6 specified "exec output injected into context uses role 'user'
with a prefix tag '[exec output]'." This works for permissive templates
(qwen2.5-coder-1.5b, the `fast` preset) but produces a back-to-back
user/user pair on strict templates that enforce the OpenAI alternation
contract — `[exec output]` user turn followed by the user's actual
follow-up question.
Fix:
context.lua:
- new field `pending_exec_output` (initially nil)
- new method `:append_exec_output(out)` buffers (concat on subsequent
captures so multi-shell-then-ai still merges everything)
- new method `:append_user(content)` flushes buffered exec output as
a `[exec output]\n...\n\n` prefix and appends a user turn
- `:reset()` also clears the buffer
repl.lua:
- run_shell calls ctx:append_exec_output(out) instead of
ctx:append({role="user", content="[exec output]\n"..out})
- ask_ai calls ctx:append_user(text) instead of raw :append; saves
prev_pending so a broker error can restore the buffer for retry
PHASE0.md §6:
- amended the role-injection paragraph to describe the buffer-and-
prepend policy; the §3 invariants list is untouched (this was a §6
design detail, not a locked invariant)
Verification:
- context unit tests cover: alternation after the failing sequence,
multi-shell merge, reset clears buffer, broker-error retry path
- live reproduction against `deep` (mistral-nemo) of the exact
user-reported sequence succeeds; model responds with a sensible
`CMD: ls | grep '^lor'` instead of a Jinja exception
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
104 lines
3.7 KiB
Lua
104 lines
3.7 KiB
Lua
-- context.lua — in-memory conversation history + token budget.
|
|
-- Phase 0: ordered turn list, sliding-window eviction by max_turns.
|
|
-- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 3 (Q1).
|
|
-- See docs/PHASE0.md §6, §8.
|
|
|
|
local M = {}
|
|
|
|
-- The §6 default system prompt. The `CMD: ` (exact prefix, single space)
|
|
-- contract is locked per §3 invariants — do not edit without amending PHASE0.
|
|
local DEFAULT_SYSTEM_PROMPT = [[
|
|
You are aish, an AI-augmented shell assistant. You help the user execute shell
|
|
commands, write and debug code, and re-engineer software. When suggesting shell
|
|
commands, output them on a line beginning with exactly "CMD: " so aish can
|
|
identify and optionally execute them. Be concise. Prefer concrete actions over
|
|
explanations unless asked.]]
|
|
|
|
local Context = {}
|
|
Context.__index = Context
|
|
|
|
function M.new(opts)
|
|
opts = opts or {}
|
|
return setmetatable({
|
|
system_prompt = opts.system_prompt or DEFAULT_SYSTEM_PROMPT,
|
|
turns = {},
|
|
pending_exec_output = nil, -- buffered until next user turn (§6)
|
|
max_turns = opts.max_turns or 40,
|
|
token_budget = opts.token_budget or 4096,
|
|
}, Context)
|
|
end
|
|
|
|
function Context:append(turn)
|
|
assert(type(turn) == "table" and turn.role and turn.content,
|
|
"context:append requires { role = ..., content = ... }")
|
|
self.turns[#self.turns + 1] = { role = turn.role, content = turn.content }
|
|
end
|
|
|
|
-- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output
|
|
-- is NOT appended as its own user turn — strict chat templates (e.g. mistral-
|
|
-- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is
|
|
-- held until the next user turn arrives, then prepended via :append_user.
|
|
function Context:append_exec_output(out)
|
|
if not out or out == "" then return end
|
|
local block = "[exec output]\n" .. out
|
|
if self.pending_exec_output then
|
|
self.pending_exec_output = self.pending_exec_output .. "\n" .. block
|
|
else
|
|
self.pending_exec_output = block
|
|
end
|
|
end
|
|
|
|
-- Append a user turn, flushing any pending exec output as a prefix. Use this
|
|
-- (rather than raw :append) for any turn whose role is "user".
|
|
function Context:append_user(content)
|
|
if self.pending_exec_output then
|
|
content = self.pending_exec_output .. "\n\n" .. content
|
|
self.pending_exec_output = nil
|
|
end
|
|
self:append({ role = "user", content = content })
|
|
end
|
|
|
|
-- Render the messages array for broker.chat (system prompt prepended; turns
|
|
-- in order). The system prompt is NOT stored in self.turns per §6.
|
|
function Context:to_messages()
|
|
local msgs = { { role = "system", content = self.system_prompt } }
|
|
for _, t in ipairs(self.turns) do
|
|
msgs[#msgs + 1] = { role = t.role, content = t.content }
|
|
end
|
|
return msgs
|
|
end
|
|
|
|
-- Evict the oldest pair (user + assistant) while we exceed max_turns. Returns
|
|
-- total turns evicted. Caller is responsible for rendering the §8 status line.
|
|
function Context:enforce_budget()
|
|
local evicted = 0
|
|
while #self.turns > self.max_turns do
|
|
table.remove(self.turns, 1)
|
|
evicted = evicted + 1
|
|
if #self.turns > self.max_turns or evicted % 2 == 1 then
|
|
if #self.turns > 0 then
|
|
table.remove(self.turns, 1)
|
|
evicted = evicted + 1
|
|
end
|
|
end
|
|
end
|
|
return evicted
|
|
end
|
|
|
|
-- Coarse char/4 token estimate per §8. Phase 0 visibility only; accurate
|
|
-- tokenization is Q1 (target Phase 3).
|
|
function Context:estimate_tokens()
|
|
local n = #self.system_prompt
|
|
for _, t in ipairs(self.turns) do
|
|
n = n + #t.content
|
|
end
|
|
return math.floor(n / 4)
|
|
end
|
|
|
|
function Context:reset()
|
|
self.turns = {}
|
|
self.pending_exec_output = nil
|
|
end
|
|
|
|
return M
|