context: cost/usage accumulator (Phase 7 commit #2)
Adds the per-conversation accumulator that broker.lua's
on_delta("usage", ...) payload feeds into. No callers yet —
commit #3 wires the broker callback to ctx:add_usage in repl.lua,
commit #4 in safety.lua.
Changes:
- Context.new: new fields `usage_totals = {}` and
`cost_warn_state = { dollars = false, tokens = false }`. R4:
two independent flags so warn_at_dollars firing doesn't
suppress warn_at_tokens (or vice versa).
- Context:add_usage(model_name, category, usage):
Increments usage_totals[model_name][category] slot. R6: when
usage.cost is nil (local llama.cpp per B3), sets a sticky
`is_local = true` flag on the slot AND does NOT add to cost
(preserves the local-vs-cloud-zero distinction for :cost detail
annotation). When usage.cost is a number (cloud), accumulates.
- Context:total_cost() / total_tokens() — pure-Lua summation
across all slots; total_tokens returns (prompt, completion).
- Context:reset_usage() — explicit :cost reset path; zeros
usage_totals AND clears both flags atomically.
- Context:reset() — R8 parity: does NOT clear usage_totals OR
cost_warn_state. Matches the Phase 4 memory_items / Phase 6
project rule ("ambient context survives a user-driven
conversation reset").
Smoke verified (20/20 unit cases):
- Empty zeros; cloud cost accumulation; local nil-cost preserves
is_local=true sticky; calls counter; cost summation across
multiple cloud calls; is_local sticky after a later nil-cost
call on a cloud slot; separate slots per (model, category);
:reset preserves; :reset_usage zeros both totals and flags.
Regression: test_safety 87/87, test_router_model 31/31, repl loads.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+71
@@ -62,6 +62,16 @@ function M.new(opts)
|
|||||||
-- max_chars overrides) live on _project_opts for :tree refresh.
|
-- max_chars overrides) live on _project_opts for :tree refresh.
|
||||||
project = nil,
|
project = nil,
|
||||||
_project_opts = nil,
|
_project_opts = nil,
|
||||||
|
-- Phase 7 (docs/PHASE7.md): cost/usage accumulator. Keyed as
|
||||||
|
-- usage_totals[model_name][category] -> { prompt, completion,
|
||||||
|
-- calls, cost, is_local }. is_local (R6) is a sticky flag
|
||||||
|
-- set when ANY recorded usage for the slot had cost==nil
|
||||||
|
-- (preserves local-vs-cloud-zero distinction for :cost detail
|
||||||
|
-- annotation). cost_warn_state (R4) carries per-threshold
|
||||||
|
-- one-shot flags so warn_at_dollars firing doesn't suppress
|
||||||
|
-- warn_at_tokens. Both survive :reset (R8 parity).
|
||||||
|
usage_totals = {},
|
||||||
|
cost_warn_state = { dollars = false, tokens = false },
|
||||||
}, Context)
|
}, Context)
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -356,10 +366,71 @@ function Context:estimate_tokens()
|
|||||||
return math.floor(n / 4)
|
return math.floor(n / 4)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Phase 7: cost/usage accumulator helpers.
|
||||||
|
--
|
||||||
|
-- Context:add_usage(model_name, category, usage)
|
||||||
|
-- Increment the (model, category) slot. usage is the payload from
|
||||||
|
-- broker.lua's on_delta("usage", ...): { prompt_tokens, completion_
|
||||||
|
-- tokens, total_tokens, cost (nil for local per R6), model, category }.
|
||||||
|
-- We use the model_name + category args (not the payload fields)
|
||||||
|
-- because the caller may want to normalize (e.g., key by req_cfg
|
||||||
|
-- alias rather than model_cfg.model).
|
||||||
|
function Context:add_usage(model_name, category, usage)
|
||||||
|
model_name = model_name or "?"
|
||||||
|
category = category or "main"
|
||||||
|
self.usage_totals = self.usage_totals or {}
|
||||||
|
local m = self.usage_totals[model_name] or {}
|
||||||
|
local c = m[category] or {
|
||||||
|
prompt = 0, completion = 0, calls = 0, cost = 0,
|
||||||
|
-- R6: sticky flag; set once any nil-cost usage lands here.
|
||||||
|
is_local = false,
|
||||||
|
}
|
||||||
|
c.prompt = c.prompt + (usage.prompt_tokens or 0)
|
||||||
|
c.completion = c.completion + (usage.completion_tokens or 0)
|
||||||
|
c.calls = c.calls + 1
|
||||||
|
if usage.cost == nil then
|
||||||
|
c.is_local = true -- preserves local-vs-cloud-zero per R6
|
||||||
|
else
|
||||||
|
c.cost = c.cost + usage.cost
|
||||||
|
end
|
||||||
|
m[category] = c
|
||||||
|
self.usage_totals[model_name] = m
|
||||||
|
end
|
||||||
|
|
||||||
|
function Context:total_cost()
|
||||||
|
local total = 0
|
||||||
|
for _, m in pairs(self.usage_totals or {}) do
|
||||||
|
for _, c in pairs(m) do total = total + (c.cost or 0) end
|
||||||
|
end
|
||||||
|
return total
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Returns (prompt_tokens, completion_tokens) summed across all slots.
|
||||||
|
function Context:total_tokens()
|
||||||
|
local p, comp = 0, 0
|
||||||
|
for _, m in pairs(self.usage_totals or {}) do
|
||||||
|
for _, c in pairs(m) do
|
||||||
|
p = p + (c.prompt or 0)
|
||||||
|
comp = comp + (c.completion or 0)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return p, comp
|
||||||
|
end
|
||||||
|
|
||||||
|
-- :cost reset path — zero accumulator AND clear per-threshold one-shot flags.
|
||||||
|
function Context:reset_usage()
|
||||||
|
self.usage_totals = {}
|
||||||
|
self.cost_warn_state = { dollars = false, tokens = false }
|
||||||
|
end
|
||||||
|
|
||||||
function Context:reset()
|
function Context:reset()
|
||||||
self.turns = {}
|
self.turns = {}
|
||||||
self.pending_exec_output = nil
|
self.pending_exec_output = nil
|
||||||
self.summary = nil
|
self.summary = nil
|
||||||
|
-- R8 parity: usage_totals + cost_warn_state preserved (matches
|
||||||
|
-- memory_items + project — "ambient context survives a user-
|
||||||
|
-- driven conversation reset"). Use :reset_usage to zero the
|
||||||
|
-- cost meter explicitly.
|
||||||
end
|
end
|
||||||
|
|
||||||
return M
|
return M
|
||||||
|
|||||||
Reference in New Issue
Block a user