From 7b4a9becc2e1fa81df027fe2234cf89b1a67b168 Mon Sep 17 00:00:00 2001
From: Markus Fritsche <mfritsche@reauktion.de>
Date: Sat, 16 May 2026 22:57:56 +0000
Subject: [PATCH] context: cost/usage accumulator (Phase 7 commit #2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the per-conversation accumulator that broker.lua's
on_delta("usage", ...) payload feeds into. No callers yet —
commit #3 wires the broker callback to ctx:add_usage in repl.lua,
commit #4 in safety.lua.

Changes:

- Context.new: new fields `usage_totals = {}` and
  `cost_warn_state = { dollars = false, tokens = false }`. R4:
  two independent flags so warn_at_dollars firing doesn't
  suppress warn_at_tokens (or vice versa).

- Context:add_usage(model_name, category, usage):
  Increments usage_totals[model_name][category] slot. R6: when
  usage.cost is nil (local llama.cpp per B3), sets a sticky
  `is_local = true` flag on the slot AND does NOT add to cost
  (preserves the local-vs-cloud-zero distinction for :cost detail
  annotation). When usage.cost is a number (cloud), accumulates.

- Context:total_cost() / total_tokens() — pure-Lua summation
  across all slots; total_tokens returns (prompt, completion).

- Context:reset_usage() — explicit :cost reset path; zeros
  usage_totals AND clears both flags atomically.

- Context:reset() — R8 parity: does NOT clear usage_totals OR
  cost_warn_state. Matches the Phase 4 memory_items / Phase 6
  project rule ("ambient context survives a user-driven
  conversation reset").

Smoke verified (20/20 unit cases):
  - Empty zeros; cloud cost accumulation; local nil-cost preserves
    is_local=true sticky; calls counter; cost summation across
    multiple cloud calls; is_local sticky after a later nil-cost
    call on a cloud slot; separate slots per (model, category);
    :reset preserves; :reset_usage zeros both totals and flags.

Regression: test_safety 87/87, test_router_model 31/31, repl loads.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 context.lua | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/context.lua b/context.lua
index 391d413..ffa02e0 100644
--- a/context.lua
+++ b/context.lua
@@ -62,6 +62,16 @@ function M.new(opts)
         -- max_chars overrides) live on _project_opts for :tree refresh.
         project              = nil,
         _project_opts        = nil,
+        -- Phase 7 (docs/PHASE7.md): cost/usage accumulator. Keyed as
+        -- usage_totals[model_name][category] -> { prompt, completion,
+        -- calls, cost, is_local }. is_local (R6) is a sticky flag
+        -- set when ANY recorded usage for the slot had cost==nil
+        -- (preserves local-vs-cloud-zero distinction for :cost detail
+        -- annotation). cost_warn_state (R4) carries per-threshold
+        -- one-shot flags so warn_at_dollars firing doesn't suppress
+        -- warn_at_tokens. Both survive :reset (R8 parity).
+        usage_totals         = {},
+        cost_warn_state      = { dollars = false, tokens = false },
     }, Context)
 end
 
@@ -356,10 +366,71 @@ function Context:estimate_tokens()
     return math.floor(n / 4)
 end
 
+-- Phase 7: cost/usage accumulator helpers.
+--
+-- Context:add_usage(model_name, category, usage)
+--   Increment the (model, category) slot. usage is the payload from
+--   broker.lua's on_delta("usage", ...): { prompt_tokens, completion_
+--   tokens, total_tokens, cost (nil for local per R6), model, category }.
+--   We use the model_name + category args (not the payload fields)
+--   because the caller may want to normalize (e.g., key by req_cfg
+--   alias rather than model_cfg.model).
+function Context:add_usage(model_name, category, usage)
+    model_name = model_name or "?"
+    category   = category   or "main"
+    self.usage_totals = self.usage_totals or {}
+    local m = self.usage_totals[model_name] or {}
+    local c = m[category] or {
+        prompt = 0, completion = 0, calls = 0, cost = 0,
+        -- R6: sticky flag; set once any nil-cost usage lands here.
+        is_local = false,
+    }
+    c.prompt     = c.prompt     + (usage.prompt_tokens or 0)
+    c.completion = c.completion + (usage.completion_tokens or 0)
+    c.calls      = c.calls      + 1
+    if usage.cost == nil then
+        c.is_local = true   -- preserves local-vs-cloud-zero per R6
+    else
+        c.cost = c.cost + usage.cost
+    end
+    m[category] = c
+    self.usage_totals[model_name] = m
+end
+
+function Context:total_cost()
+    local total = 0
+    for _, m in pairs(self.usage_totals or {}) do
+        for _, c in pairs(m) do total = total + (c.cost or 0) end
+    end
+    return total
+end
+
+-- Returns (prompt_tokens, completion_tokens) summed across all slots.
+function Context:total_tokens()
+    local p, comp = 0, 0
+    for _, m in pairs(self.usage_totals or {}) do
+        for _, c in pairs(m) do
+            p    = p    + (c.prompt     or 0)
+            comp = comp + (c.completion or 0)
+        end
+    end
+    return p, comp
+end
+
+-- :cost reset path — zero accumulator AND clear per-threshold one-shot flags.
+function Context:reset_usage()
+    self.usage_totals    = {}
+    self.cost_warn_state = { dollars = false, tokens = false }
+end
+
 function Context:reset()
     self.turns = {}
     self.pending_exec_output = nil
     self.summary = nil
+    -- R8 parity: usage_totals + cost_warn_state preserved (matches
+    -- memory_items + project — "ambient context survives a user-
+    -- driven conversation reset"). Use :reset_usage to zero the
+    -- cost meter explicitly.
 end
 
 return M