From a3c1813465d28bec3aa6b43269721b93a1113cf5 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Sun, 17 May 2026 09:20:56 +0000 Subject: [PATCH] context: proactive periodic summarization (closes #101) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #101 (FR-A from the 2026-05-17 German strategy analysis, small-model improvement strategy 5: "History-Zusammenfassung via local"). Phase 5 summarize-on-evict only fires at budget pressure — exactly when the local model is already suffering. Small models benefit from tight context from turn 1, not "after eviction". This commit adds CADENCE-triggered summarization that fires every N appends regardless of budget, folding turns older than `summarize_keep_recent` into ctx.summary via the existing Phase 5 summarize_fn closure. context.lua additions: - New ctx fields: summarize_every_n_turns, summarize_keep_recent (default 4), _turns_since_summarize (counter). - Context:append bumps the counter on every store. - Context:enforce_cadence — the new entry point. Returns the number of turns folded (0 on no-op). Guards: * disabled (cfg unset OR summarize_fn unset) -> 0 * not yet due (_turns_since_summarize < N) -> 0 * Norris-active (Phase 5 R-C4 parity — planner stays on goal) -> 0 * #turns <= keep_recent (nothing to fold) -> 0 * summarize_fn returns nil/empty -> 0 (defer to enforce_budget later) Orphan-tool guard: when the fold slice would end on an assistant-with-tool_calls, peel back the right edge until the next live turn isn't role=tool. Strict chat templates reject tool-without-assistant-anchor (#87 already encountered this). - If ctx.summary grows past max_summary_chars after the fold, compress in a second pass (same shape as enforce_budget's Phase 5 logic). repl.lua wiring: - ctx_opts continues to copy all config.context keys; the new summarize_every_n_turns / summarize_keep_recent fields flow through automatically. - make_summarize_fn is now wired when EITHER summarize_on_evict OR summarize_every_n_turns is set (same closure, different trigger — Phase 5's #51 #issue eviction path uses it on budget; #101 uses it on cadence). - New status_cadence_fold helper: "[aish] proactively summarized N older turns". - ask_ai's existing enforce_budget call site now first fires enforce_cadence, then enforce_budget. Cadence comes first so the token estimate enforce_budget sees is the tighter post-fold one — no spurious eviction of turns we just summarized. - Norris path NOT wired: enforce_cadence is a no-op there via the norris_active guard (consistent with Phase 5 R-C4). 18 inline unit cases for enforce_cadence: - cfg disabled / no summarize_fn / below cadence -> 0 - cadence met -> exact fold count (N - keep) - summary contains folded contents; first/last live turn IDs match - cadence counter resets; second fold fires after another N appends - Norris-active -> suppressed - orphan-tool guard: peels back when last folded = asst+tool_calls - summary compression triggers when over max_summary_chars E2E verified on hossenfelder:8082, summarize_every_n_turns=4 / summarize_keep_recent=2: 5 user turns -> 2 cadence fires: [aish] proactively summarized 2 older turns [aish] proactively summarized 4 older turns :cost detail shows main=5 calls, summarize=2 calls (matches fires). Estimated ctx token count: 180 (vs ~1000 unsummarized). Flag-off path: no status, identical to pre-#101 behavior. Regression: 87/87 safety, 31/31 router_model, repl loads. Co-Authored-By: Claude Opus 4.7 (1M context) --- context.lua | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++ repl.lua | 18 ++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/context.lua b/context.lua index 97d50a8..4f2d50c 100644 --- a/context.lua +++ b/context.lua @@ -56,6 +56,17 @@ function M.new(opts) summarize_fn = opts.summarize_fn, summary = nil, -- rolling summary string max_summary_chars = opts.max_summary_chars or 2000, + -- #101: proactive periodic summarization (cadence-triggered, + -- in addition to Phase 5's eviction-triggered path). When + -- summarize_every_n_turns is set AND summarize_fn is wired, + -- enforce_cadence() folds turns older than the last + -- summarize_keep_recent into ctx.summary every N appends. + -- Goal: keep the wire prompt tight from the start so small + -- local models aren't fed near-budget context until eviction + -- forces a fold. nil = disabled (existing behavior). + summarize_every_n_turns = opts.summarize_every_n_turns, + summarize_keep_recent = opts.summarize_keep_recent or 4, + _turns_since_summarize = 0, -- Phase 6 (#issue Phase 6 §6): project file-tree block, set by -- repl.lua via :tree meta or the cfg.project.auto_tree startup -- hook. nil = no block injected. Cached scan opts (depth / @@ -117,6 +128,8 @@ function Context:append(turn) assert(turn.content, "context:append requires content for role=" .. turn.role) end self.turns[#self.turns + 1] = stored + -- #101: bump cadence counter so enforce_cadence knows when to fire. + self._turns_since_summarize = (self._turns_since_summarize or 0) + 1 end -- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output @@ -388,6 +401,66 @@ function Context:to_messages(opts) return msgs end +-- #101: proactive periodic summarization. Fires every +-- summarize_every_n_turns appends, folding turns older than the last +-- summarize_keep_recent into ctx.summary via summarize_fn. Returns +-- the number of turns folded (0 if disabled / not yet due / nothing +-- to fold / Norris-mode / callback failed). +-- +-- Norris suppression (Phase 5 R-C4 parity): the planner stays +-- focused on its goal anchor — folding history mid-loop would +-- change its perceived progress. +-- +-- Orphan-tool guard: never fold an assistant-with-tool_calls turn +-- without its matching role=tool turn(s). When the slice would end +-- on such an assistant, peel back until it doesn't (the unfolded +-- tail then becomes part of the live window — temporarily larger +-- than summarize_keep_recent, but chat-template-legal). +function Context:enforce_cadence() + if self.norris_active then return 0 end + if not self.summarize_fn then return 0 end + if not self.summarize_every_n_turns then return 0 end + if (self._turns_since_summarize or 0) < self.summarize_every_n_turns then + return 0 + end + local keep = self.summarize_keep_recent or 4 + local n = #self.turns + if n <= keep then return 0 end + + local fold_count = n - keep + -- Orphan-tool guard: peel back from the right edge of the fold + -- slice while the last folded turn is assistant-with-tool_calls. + while fold_count > 0 do + local last = self.turns[fold_count] + if last and last.role == "assistant" + and last.tool_calls and #last.tool_calls > 0 then + fold_count = fold_count - 1 + else + break + end + end + if fold_count == 0 then return 0 end + + local pair = {} + for i = 1, fold_count do pair[i] = self.turns[i] end + + local ok, new_summary = pcall(self.summarize_fn, self.summary, pair) + if not ok or type(new_summary) ~= "string" or new_summary == "" then + return 0 -- failure: leave turns; eviction will handle them later + end + self.summary = new_summary + if #self.summary > self.max_summary_chars then + local ok2, compressed = pcall(self.summarize_fn, self.summary, nil) + if ok2 and type(compressed) == "string" and compressed ~= "" then + self.summary = compressed + end + end + + for _ = 1, fold_count do table.remove(self.turns, 1) end + self._turns_since_summarize = 0 + return fold_count +end + -- Evict the oldest pair (user + assistant) while we exceed max_turns -- OR token_budget (Phase 8 pillar 5). Returns total turns evicted. -- Caller is responsible for rendering the §8 status line. diff --git a/repl.lua b/repl.lua index 848807d..2436057 100644 --- a/repl.lua +++ b/repl.lua @@ -361,7 +361,11 @@ function M.run(config) if config.context then for k, v in pairs(config.context) do ctx_opts[k] = v end end - if config.context and config.context.summarize_on_evict then + -- #101: summarize_fn is also needed for cadence-triggered + -- summarization (Context:enforce_cadence). Wire it whenever + -- EITHER feature is enabled — same closure, different trigger. + if config.context and (config.context.summarize_on_evict + or config.context.summarize_every_n_turns) then ctx_opts.summarize_fn = make_summarize_fn() end -- Phase 8 (docs/PHASE8.md): when cfg.tokenize.use_endpoint is true, @@ -674,6 +678,13 @@ function M.run(config) end end + -- #101: status line for cadence-triggered fold. + local function status_cadence_fold(n) + if n and n > 0 then + renderer.status(("proactively summarized %d older turns"):format(n)) + end + end + -- ── Phase 5: fallback eligibility per PHASE5.md §5 ────────────────── -- All transport-failure patterns must match against the err string -- as broker.lua emits it (with "transport: " prefix). The matcher @@ -1146,6 +1157,11 @@ function M.run(config) -- loop body re-runs broker.chat_stream with the now-extended ctx end + -- #101: proactively fold older turns into ctx.summary on + -- cadence (when cfg.context.summarize_every_n_turns is set). + -- BEFORE enforce_budget so it sees a tighter token estimate + -- and doesn't evict turns we just summarized. + status_cadence_fold(ctx:enforce_cadence()) status_evictions(ctx:enforce_budget()) -- CMD: extraction on the final pure-text response only.