repl: _record_usage helper + opts.category at 5 sites (Phase 7 commit #3)
Wires broker.lua's on_delta("usage", payload) and broker.chat's
(text, usage) return to the ctx accumulator via a single chokepoint.
Changes:
- Forward decl `local _record_usage` near _bg_spawn — same pattern;
the summarize-on-evict closure in make_summarize_fn (built at
line 299) needs lexical access to _record_usage (assigned at
line 695), so forward-declare and assign-without-`local`.
- _record_usage(model, category, usage) — R5 central chokepoint:
routes to ctx:add_usage, then checks the per-threshold warn
state. R4: cost_warn_state has two independent flags (dollars
and tokens) so first-to-fire doesn't suppress the other. R10:
warn message uses $%.6f for sub-cent precision.
- call_broker wrapper: wrapped on_delta now branches on
kind == "usage" -> _record_usage(payload.model, payload.category,
payload). R2: keys by payload.model (set inside broker.lua from
model_cfg.model). When fallback fires, broker is called with
fb_cfg, so payload.model IS the fallback's name automatically —
wrapper doesn't track primary-vs-fallback itself.
- 5 caller sites wired with opts.category:
ask_ai call_broker -> category="main"
summarize-on-evict -> category="summarize"
DELEGATE: handler -> category="delegate"
:memory summarize -> category="memory_summarize"
:delegate meta -> category="delegate"
- All 4 broker.chat call sites switched from
local reply, err = broker.chat(...)
to
local reply, second = broker.chat(...)
branching on reply nil-ness to interpret second (err on failure,
usage on success). Captured usage routes through _record_usage.
E2E verified against live cloud broker:
- cloud prompt -> reply "Hi! 👋"
- Warn fired: "session cost $0.000219 has crossed warn_at_dollars=$0.000010"
- R10 sub-cent precision visible in both numbers.
Norris + safety paths still untouched — commit #4 wires those.
Regression: test_safety 87/87, test_router_model 31/31, repl loads.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -215,6 +215,14 @@ function M.run(config)
|
|||||||
-- has to exist as a local in scope BEFORE ask_ai is declared.
|
-- has to exist as a local in scope BEFORE ask_ai is declared.
|
||||||
local _bg_spawn
|
local _bg_spawn
|
||||||
|
|
||||||
|
-- Phase 7 forward decl: _record_usage is the central chokepoint
|
||||||
|
-- for ctx:add_usage + warn-threshold check. Defined alongside
|
||||||
|
-- call_broker below, but needs to be in lexical scope of the
|
||||||
|
-- summarize-on-evict closure (which is built up earlier in
|
||||||
|
-- make_summarize_fn). Same forward-declaration pattern as
|
||||||
|
-- _bg_spawn — assign below, reference both early and late.
|
||||||
|
local _record_usage
|
||||||
|
|
||||||
-- Issue #13: secret redaction. Load vault if configured, create a
|
-- Issue #13: secret redaction. Load vault if configured, create a
|
||||||
-- session for this conversation. ctx stores PLAIN; we scrub just
|
-- session for this conversation. ctx stores PLAIN; we scrub just
|
||||||
-- before broker.chat_stream and rehydrate the streamed reply for
|
-- before broker.chat_stream and rehydrate the streamed reply for
|
||||||
@@ -323,12 +331,19 @@ function M.run(config)
|
|||||||
.. "No commentary, no markdown, no bullet lists." },
|
.. "No commentary, no markdown, no bullet lists." },
|
||||||
{ role = "user", content = body },
|
{ role = "user", content = body },
|
||||||
}, secrets_mode_for(sum_cfg))
|
}, secrets_mode_for(sum_cfg))
|
||||||
local reply, err = broker.chat(sum_cfg, sum_msgs,
|
-- Phase 7: broker.chat returns (text, usage) on success or
|
||||||
{ max_tokens = 300, timeout_ms = 30000 })
|
-- (nil, errmsg) on failure. Capture as (text, second); branch
|
||||||
|
-- on text nil-ness to interpret second.
|
||||||
|
local reply, second = broker.chat(sum_cfg, sum_msgs,
|
||||||
|
{ max_tokens = 300, timeout_ms = 30000,
|
||||||
|
category = "summarize" })
|
||||||
if not reply then
|
if not reply then
|
||||||
renderer.status("context summarize failed: " .. tostring(err))
|
renderer.status("context summarize failed: " .. tostring(second))
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
if second then -- usage payload
|
||||||
|
_record_usage(second.model, second.category, second)
|
||||||
|
end
|
||||||
if secrets_session then
|
if secrets_session then
|
||||||
reply = secrets_session:rehydrate(reply)
|
reply = secrets_session:rehydrate(reply)
|
||||||
end
|
end
|
||||||
@@ -670,15 +685,58 @@ function M.run(config)
|
|||||||
and fallback_reason(err) ~= nil
|
and fallback_reason(err) ~= nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Phase 7 (R5): central chokepoint for usage recording. Wraps
|
||||||
|
-- ctx:add_usage AND does the warn-threshold check. All callers
|
||||||
|
-- (this file + safety.lua via helpers.on_usage / opts.on_usage)
|
||||||
|
-- route through here so the warn check fires exactly once per
|
||||||
|
-- accumulator update. Keeps context.lua decoupled from renderer.
|
||||||
|
-- R2: caller passes the model name that should be CREDITED — for
|
||||||
|
-- normal calls that's the active model; for fallback retries the
|
||||||
|
-- broker's payload.model (which IS the fallback's model_cfg.model
|
||||||
|
-- per broker emission) handles it correctly.
|
||||||
|
_record_usage = function(model, category, usage)
|
||||||
|
if not usage then return end
|
||||||
|
ctx:add_usage(model, category, usage)
|
||||||
|
if not (config.cost) then return end
|
||||||
|
local cw = ctx.cost_warn_state
|
||||||
|
if config.cost.warn_at_dollars and not cw.dollars then
|
||||||
|
local cost = ctx:total_cost()
|
||||||
|
if cost >= config.cost.warn_at_dollars then
|
||||||
|
renderer.status(("session cost $%.6f has crossed warn_at_dollars=$%.6f")
|
||||||
|
:format(cost, config.cost.warn_at_dollars))
|
||||||
|
cw.dollars = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if config.cost.warn_at_tokens and not cw.tokens then
|
||||||
|
local p, c = ctx:total_tokens()
|
||||||
|
if (p + c) >= config.cost.warn_at_tokens then
|
||||||
|
renderer.status(("session tokens %d has crossed warn_at_tokens=%d")
|
||||||
|
:format(p + c, config.cost.warn_at_tokens))
|
||||||
|
cw.tokens = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
-- Wrap broker.chat_stream with the Phase 5 fallback-retry path.
|
-- Wrap broker.chat_stream with the Phase 5 fallback-retry path.
|
||||||
-- Retries ONCE against cfg.routing.fallback_model (default "cloud")
|
-- Retries ONCE against cfg.routing.fallback_model (default "cloud")
|
||||||
-- when (a) cfg.routing.fallback is true, (b) err matches a
|
-- when (a) cfg.routing.fallback is true, (b) err matches a
|
||||||
-- fallback-eligible pattern, AND (c) no deltas have arrived yet
|
-- fallback-eligible pattern, AND (c) no deltas have arrived yet
|
||||||
-- (mid-stream failures aren't retried — partial text would be
|
-- (mid-stream failures aren't retried — partial text would be
|
||||||
-- duplicated).
|
-- duplicated).
|
||||||
|
--
|
||||||
|
-- Phase 7 (R2): wrapped on_delta keys usage by payload.model
|
||||||
|
-- (set inside broker.lua from model_cfg.model — the
|
||||||
|
-- CALLER-INTENDED model name). When fallback fires, the broker
|
||||||
|
-- is called with fb_cfg, so payload.model is naturally the
|
||||||
|
-- fallback's model name — wrapper doesn't need to track
|
||||||
|
-- primary-vs-fallback itself.
|
||||||
local function call_broker(model_cfg, model_name, msgs, on_delta, opts)
|
local function call_broker(model_cfg, model_name, msgs, on_delta, opts)
|
||||||
local any_delta = false
|
local any_delta = false
|
||||||
local wrapped = function(kind, payload)
|
local wrapped = function(kind, payload)
|
||||||
|
if kind == "usage" then
|
||||||
|
_record_usage(payload.model, payload.category, payload)
|
||||||
|
return -- usage isn't forwarded to the underlying on_delta
|
||||||
|
end
|
||||||
any_delta = true
|
any_delta = true
|
||||||
return on_delta(kind, payload)
|
return on_delta(kind, payload)
|
||||||
end
|
end
|
||||||
@@ -939,7 +997,7 @@ function M.run(config)
|
|||||||
tool_calls_seen[#tool_calls_seen + 1] = payload
|
tool_calls_seen[#tool_calls_seen + 1] = payload
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
{ tools = tools_schema() })
|
{ tools = tools_schema(), category = "main" })
|
||||||
if rehydrator then
|
if rehydrator then
|
||||||
local tail = rehydrator:flush()
|
local tail = rehydrator:flush()
|
||||||
if tail ~= "" then
|
if tail ~= "" then
|
||||||
@@ -1101,12 +1159,17 @@ function M.run(config)
|
|||||||
local sub_msgs = scrub_messages(
|
local sub_msgs = scrub_messages(
|
||||||
{ { role = "user", content = d.prompt } },
|
{ { role = "user", content = d.prompt } },
|
||||||
secrets_mode_for(sub_cfg))
|
secrets_mode_for(sub_cfg))
|
||||||
local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs)
|
-- Phase 7: capture (text, usage) — second is err on failure.
|
||||||
|
local sub_text, second = broker.chat(sub_cfg, sub_msgs,
|
||||||
|
{ category = "delegate" })
|
||||||
if not sub_text then
|
if not sub_text then
|
||||||
renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(sub_err)))
|
renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(second)))
|
||||||
ctx:append_exec_output(
|
ctx:append_exec_output(
|
||||||
("[delegate %s failed: %s]"):format(d.preset, tostring(sub_err)))
|
("[delegate %s failed: %s]"):format(d.preset, tostring(second)))
|
||||||
else
|
else
|
||||||
|
if second then -- usage payload
|
||||||
|
_record_usage(second.model, second.category, second)
|
||||||
|
end
|
||||||
-- Rehydrate the reply so the model sees its own
|
-- Rehydrate the reply so the model sees its own
|
||||||
-- secrets restored when this gets re-serialized
|
-- secrets restored when this gets re-serialized
|
||||||
-- on the next ask_ai turn.
|
-- on the next ask_ai turn.
|
||||||
@@ -1584,13 +1647,18 @@ function M.run(config)
|
|||||||
},
|
},
|
||||||
{ role = "user", content = transcript },
|
{ role = "user", content = transcript },
|
||||||
}, secrets_mode_for(sum_cfg))
|
}, secrets_mode_for(sum_cfg))
|
||||||
local reply, err = broker.chat(sum_cfg, sum_msgs,
|
-- Phase 7: capture (text, usage); second is err on failure.
|
||||||
{ max_tokens = 1024, timeout_ms = 90000 })
|
local reply, second = broker.chat(sum_cfg, sum_msgs,
|
||||||
|
{ max_tokens = 1024, timeout_ms = 90000,
|
||||||
|
category = "memory_summarize" })
|
||||||
|
|
||||||
if not reply then
|
if not reply then
|
||||||
renderer.status("summarize failed: " .. tostring(err))
|
renderer.status("summarize failed: " .. tostring(second))
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
if second then -- usage payload
|
||||||
|
_record_usage(second.model, second.category, second)
|
||||||
|
end
|
||||||
if secrets_session then
|
if secrets_session then
|
||||||
reply = secrets_session:rehydrate(reply)
|
reply = secrets_session:rehydrate(reply)
|
||||||
end
|
end
|
||||||
@@ -2153,10 +2221,15 @@ function M.run(config)
|
|||||||
local sub_msgs = scrub_messages(
|
local sub_msgs = scrub_messages(
|
||||||
{ { role = "user", content = prompt } },
|
{ { role = "user", content = prompt } },
|
||||||
secrets_mode_for(sub_cfg))
|
secrets_mode_for(sub_cfg))
|
||||||
local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs)
|
-- Phase 7: capture (text, usage); second is err on failure.
|
||||||
|
local sub_text, second = broker.chat(sub_cfg, sub_msgs,
|
||||||
|
{ category = "delegate" })
|
||||||
if not sub_text then
|
if not sub_text then
|
||||||
renderer.status(("delegate %s failed: %s"):format(preset, tostring(sub_err)))
|
renderer.status(("delegate %s failed: %s"):format(preset, tostring(second)))
|
||||||
else
|
else
|
||||||
|
if second then -- usage payload
|
||||||
|
_record_usage(second.model, second.category, second)
|
||||||
|
end
|
||||||
if secrets_session then
|
if secrets_session then
|
||||||
sub_text = secrets_session:rehydrate(sub_text)
|
sub_text = secrets_session:rehydrate(sub_text)
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user