From 16490e6905380f1c1913e1344747275496fbe331 Mon Sep 17 00:00:00 2001
From: Markus Fritsche <mfritsche@reauktion.de>
Date: Sun, 10 May 2026 18:41:21 +0000
Subject: [PATCH] fix: buffer exec output for next user turn; alternation for
 strict templates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

User-test surfaced the bug: with `deep` (mistral-nemo-12b) active,
running `list files` -> y on `CMD: ls` -> `Are there directory entries
beginning with "lor"?` returned a Jinja exception:

    api: ... Error: Jinja Exception: After the optional system message,
    conversation roles must alternate user/assistant/user/assistant/...

Cause: §6 specified "exec output injected into context uses role 'user'
with a prefix tag '[exec output]'." This works for permissive templates
(qwen2.5-coder-1.5b, the `fast` preset) but produces a back-to-back
user/user pair on strict templates that enforce the OpenAI alternation
contract — `[exec output]` user turn followed by the user's actual
follow-up question.

Fix:

context.lua:
  - new field `pending_exec_output` (initially nil)
  - new method `:append_exec_output(out)` buffers (concat on subsequent
    captures so multi-shell-then-ai still merges everything)
  - new method `:append_user(content)` flushes buffered exec output as
    a `[exec output]\n...\n\n` prefix and appends a user turn
  - `:reset()` also clears the buffer

repl.lua:
  - run_shell calls ctx:append_exec_output(out) instead of
    ctx:append({role="user", content="[exec output]\n"..out})
  - ask_ai calls ctx:append_user(text) instead of raw :append; saves
    prev_pending so a broker error can restore the buffer for retry

PHASE0.md §6:
  - amended the role-injection paragraph to describe the buffer-and-
    prepend policy; the §3 invariants list is untouched (this was a §6
    design detail, not a locked invariant)

Verification:
  - context unit tests cover: alternation after the failing sequence,
    multi-shell merge, reset clears buffer, broker-error retry path
  - live reproduction against `deep` (mistral-nemo) of the exact
    user-reported sequence succeeds; model responds with a sensible
    `CMD: ls | grep '^lor'` instead of a Jinja exception

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 context.lua    | 34 ++++++++++++++++++++++++++++++----
 docs/PHASE0.md |  4 +++-
 repl.lua       | 14 +++++++++-----
 3 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/context.lua b/context.lua
index b00e955..e52fdbd 100644
--- a/context.lua
+++ b/context.lua
@@ -20,10 +20,11 @@ Context.__index = Context
 function M.new(opts)
     opts = opts or {}
     return setmetatable({
-        system_prompt = opts.system_prompt or DEFAULT_SYSTEM_PROMPT,
-        turns         = {},
-        max_turns     = opts.max_turns    or 40,
-        token_budget  = opts.token_budget or 4096,
+        system_prompt        = opts.system_prompt or DEFAULT_SYSTEM_PROMPT,
+        turns                = {},
+        pending_exec_output  = nil,   -- buffered until next user turn (§6)
+        max_turns            = opts.max_turns    or 40,
+        token_budget         = opts.token_budget or 4096,
     }, Context)
 end
 
@@ -33,6 +34,30 @@ function Context:append(turn)
     self.turns[#self.turns + 1] = { role = turn.role, content = turn.content }
 end
 
+-- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output
+-- is NOT appended as its own user turn — strict chat templates (e.g. mistral-
+-- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is
+-- held until the next user turn arrives, then prepended via :append_user.
+function Context:append_exec_output(out)
+    if not out or out == "" then return end
+    local block = "[exec output]\n" .. out
+    if self.pending_exec_output then
+        self.pending_exec_output = self.pending_exec_output .. "\n" .. block
+    else
+        self.pending_exec_output = block
+    end
+end
+
+-- Append a user turn, flushing any pending exec output as a prefix. Use this
+-- (rather than raw :append) for any turn whose role is "user".
+function Context:append_user(content)
+    if self.pending_exec_output then
+        content = self.pending_exec_output .. "\n\n" .. content
+        self.pending_exec_output = nil
+    end
+    self:append({ role = "user", content = content })
+end
+
 -- Render the messages array for broker.chat (system prompt prepended; turns
 -- in order). The system prompt is NOT stored in self.turns per §6.
 function Context:to_messages()
@@ -72,6 +97,7 @@ end
 
 function Context:reset()
     self.turns = {}
+    self.pending_exec_output = nil
 end
 
 return M
diff --git a/docs/PHASE0.md b/docs/PHASE0.md
index 942cb65..16db635 100644
--- a/docs/PHASE0.md
+++ b/docs/PHASE0.md
@@ -141,7 +141,9 @@ Each turn is stored in `context.lua` as:
 { role = "system" | "user" | "assistant", content = "..." }
 ```
 
-The system prompt is prepended on every request and is not stored as a history turn. Exec output injected into context uses role `"user"` with a prefix tag `[exec output]`.
+The system prompt is prepended on every request and is not stored as a history turn.
+
+**Exec output injection.** Captured shell-exec output is **not** appended as its own user turn — that produces user/user back-to-back, which strict chat templates (e.g. `mistral-nemo-instruct`'s Jinja) reject with `roles must alternate user/assistant/...`. Instead, exec output is buffered on the context and prepended to the **next** user turn with a `[exec output]` tag. Multiple shell calls between AI turns concatenate. `:reset` clears the buffer. The user-visible behavior is unchanged; only the role alternation seen by the broker differs.
 
 ### System prompt (Phase 0 default)
 
diff --git a/repl.lua b/repl.lua
index e10c5f0..d76b2f8 100644
--- a/repl.lua
+++ b/repl.lua
@@ -46,7 +46,10 @@ function M.run(config)
     end
 
     -- Run a shell command, framing output and (per config.shell.capture_output)
-    -- injecting it back into context as a `[exec output]`-tagged user turn.
+    -- buffering it for the NEXT user turn — context.append_exec_output keeps
+    -- a [exec output] block pending until ask_ai flushes it via append_user.
+    -- Direct user-role injection violated chat-template alternation (mistral-
+    -- nemo's Jinja rejects user/user back-to-back); see PHASE0.md §6.
     local function run_shell(cmd)
         local chd, err = executor.maybe_chdir(cmd)
         if chd ~= nil then
@@ -61,19 +64,20 @@ function M.run(config)
         local out, code = executor.exec(cmd)
         renderer.exec_output(out, code)
         if config.shell and config.shell.capture_output then
-            ctx:append({ role = "user", content = "[exec output]\n" .. out })
-            status_evictions(ctx:enforce_budget())
+            ctx:append_exec_output(out)
         end
     end
 
     -- Send user text to the active model, render the response, and (per
     -- §6 + config.shell.confirm_cmd) optionally execute extracted CMD: lines.
     local function ask_ai(text)
-        ctx:append({ role = "user", content = text })
+        local prev_pending = ctx.pending_exec_output
+        ctx:append_user(text)  -- flushes any pending [exec output] as prefix
         local resp, err = broker.chat(active_cfg, ctx:to_messages())
         if not resp then
             renderer.status("broker error: " .. tostring(err))
-            table.remove(ctx.turns)  -- back out the user turn we just added
+            table.remove(ctx.turns)              -- back out the merged user turn
+            ctx.pending_exec_output = prev_pending  -- restore buffered exec output
             return
         end
         ctx:append({ role = "assistant", content = resp })