repl: _record_usage helper + opts.category at 5 sites (Phase 7 commit #3)

Wires broker.lua's on_delta("usage", payload) and broker.chat's
(text, usage) return to the ctx accumulator via a single chokepoint.

Changes:

  - Forward decl `local _record_usage` near _bg_spawn — same pattern;
    the summarize-on-evict closure in make_summarize_fn (built at
    line 299) needs lexical access to _record_usage (assigned at
    line 695), so forward-declare and assign-without-`local`.

  - _record_usage(model, category, usage) — R5 central chokepoint:
    routes to ctx:add_usage, then checks the per-threshold warn
    state. R4: cost_warn_state has two independent flags (dollars
    and tokens) so first-to-fire doesn't suppress the other. R10:
    warn message uses $%.6f for sub-cent precision.

  - call_broker wrapper: wrapped on_delta now branches on
    kind == "usage" -> _record_usage(payload.model, payload.category,
    payload). R2: keys by payload.model (set inside broker.lua from
    model_cfg.model). When fallback fires, broker is called with
    fb_cfg, so payload.model IS the fallback's name automatically —
    wrapper doesn't track primary-vs-fallback itself.

  - 5 caller sites wired with opts.category:
      ask_ai call_broker             -> category="main"
      summarize-on-evict             -> category="summarize"
      DELEGATE: handler              -> category="delegate"
      :memory summarize              -> category="memory_summarize"
      :delegate meta                 -> category="delegate"

  - All 4 broker.chat call sites switched from
      local reply, err = broker.chat(...)
    to
      local reply, second = broker.chat(...)
    branching on reply nil-ness to interpret second (err on failure,
    usage on success). Captured usage routes through _record_usage.

E2E verified against live cloud broker:
  - cloud prompt -> reply "Hi! 👋"
  - Warn fired: "session cost $0.000219 has crossed warn_at_dollars=$0.000010"
  - R10 sub-cent precision visible in both numbers.

Norris + safety paths still untouched — commit #4 wires those.

Regression: test_safety 87/87, test_router_model 31/31, repl loads.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 23:00:06 +00:00
parent 7b4a9becc2
commit 8adebd52cc
+85 -12
View File
@@ -215,6 +215,14 @@ function M.run(config)
-- has to exist as a local in scope BEFORE ask_ai is declared.
local _bg_spawn
-- Phase 7 forward decl: _record_usage is the central chokepoint
-- for ctx:add_usage + warn-threshold check. Defined alongside
-- call_broker below, but needs to be in lexical scope of the
-- summarize-on-evict closure (which is built up earlier in
-- make_summarize_fn). Same forward-declaration pattern as
-- _bg_spawn — assign below, reference both early and late.
local _record_usage
-- Issue #13: secret redaction. Load vault if configured, create a
-- session for this conversation. ctx stores PLAIN; we scrub just
-- before broker.chat_stream and rehydrate the streamed reply for
@@ -323,12 +331,19 @@ function M.run(config)
.. "No commentary, no markdown, no bullet lists." },
{ role = "user", content = body },
}, secrets_mode_for(sum_cfg))
local reply, err = broker.chat(sum_cfg, sum_msgs,
{ max_tokens = 300, timeout_ms = 30000 })
-- Phase 7: broker.chat returns (text, usage) on success or
-- (nil, errmsg) on failure. Capture as (text, second); branch
-- on text nil-ness to interpret second.
local reply, second = broker.chat(sum_cfg, sum_msgs,
{ max_tokens = 300, timeout_ms = 30000,
category = "summarize" })
if not reply then
renderer.status("context summarize failed: " .. tostring(err))
renderer.status("context summarize failed: " .. tostring(second))
return nil
end
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
if secrets_session then
reply = secrets_session:rehydrate(reply)
end
@@ -670,15 +685,58 @@ function M.run(config)
and fallback_reason(err) ~= nil
end
-- Phase 7 (R5): central chokepoint for usage recording. Wraps
-- ctx:add_usage AND does the warn-threshold check. All callers
-- (this file + safety.lua via helpers.on_usage / opts.on_usage)
-- route through here so the warn check fires exactly once per
-- accumulator update. Keeps context.lua decoupled from renderer.
-- R2: caller passes the model name that should be CREDITED — for
-- normal calls that's the active model; for fallback retries the
-- broker's payload.model (which IS the fallback's model_cfg.model
-- per broker emission) handles it correctly.
_record_usage = function(model, category, usage)
if not usage then return end
ctx:add_usage(model, category, usage)
if not (config.cost) then return end
local cw = ctx.cost_warn_state
if config.cost.warn_at_dollars and not cw.dollars then
local cost = ctx:total_cost()
if cost >= config.cost.warn_at_dollars then
renderer.status(("session cost $%.6f has crossed warn_at_dollars=$%.6f")
:format(cost, config.cost.warn_at_dollars))
cw.dollars = true
end
end
if config.cost.warn_at_tokens and not cw.tokens then
local p, c = ctx:total_tokens()
if (p + c) >= config.cost.warn_at_tokens then
renderer.status(("session tokens %d has crossed warn_at_tokens=%d")
:format(p + c, config.cost.warn_at_tokens))
cw.tokens = true
end
end
end
-- Wrap broker.chat_stream with the Phase 5 fallback-retry path.
-- Retries ONCE against cfg.routing.fallback_model (default "cloud")
-- when (a) cfg.routing.fallback is true, (b) err matches a
-- fallback-eligible pattern, AND (c) no deltas have arrived yet
-- (mid-stream failures aren't retried — partial text would be
-- duplicated).
--
-- Phase 7 (R2): wrapped on_delta keys usage by payload.model
-- (set inside broker.lua from model_cfg.model — the
-- CALLER-INTENDED model name). When fallback fires, the broker
-- is called with fb_cfg, so payload.model is naturally the
-- fallback's model name — wrapper doesn't need to track
-- primary-vs-fallback itself.
local function call_broker(model_cfg, model_name, msgs, on_delta, opts)
local any_delta = false
local wrapped = function(kind, payload)
if kind == "usage" then
_record_usage(payload.model, payload.category, payload)
return -- usage isn't forwarded to the underlying on_delta
end
any_delta = true
return on_delta(kind, payload)
end
@@ -939,7 +997,7 @@ function M.run(config)
tool_calls_seen[#tool_calls_seen + 1] = payload
end
end,
{ tools = tools_schema() })
{ tools = tools_schema(), category = "main" })
if rehydrator then
local tail = rehydrator:flush()
if tail ~= "" then
@@ -1101,12 +1159,17 @@ function M.run(config)
local sub_msgs = scrub_messages(
{ { role = "user", content = d.prompt } },
secrets_mode_for(sub_cfg))
local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs)
-- Phase 7: capture (text, usage) — second is err on failure.
local sub_text, second = broker.chat(sub_cfg, sub_msgs,
{ category = "delegate" })
if not sub_text then
renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(sub_err)))
renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(second)))
ctx:append_exec_output(
("[delegate %s failed: %s]"):format(d.preset, tostring(sub_err)))
("[delegate %s failed: %s]"):format(d.preset, tostring(second)))
else
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
-- Rehydrate the reply so the model sees its own
-- secrets restored when this gets re-serialized
-- on the next ask_ai turn.
@@ -1584,13 +1647,18 @@ function M.run(config)
},
{ role = "user", content = transcript },
}, secrets_mode_for(sum_cfg))
local reply, err = broker.chat(sum_cfg, sum_msgs,
{ max_tokens = 1024, timeout_ms = 90000 })
-- Phase 7: capture (text, usage); second is err on failure.
local reply, second = broker.chat(sum_cfg, sum_msgs,
{ max_tokens = 1024, timeout_ms = 90000,
category = "memory_summarize" })
if not reply then
renderer.status("summarize failed: " .. tostring(err))
renderer.status("summarize failed: " .. tostring(second))
return
end
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
if secrets_session then
reply = secrets_session:rehydrate(reply)
end
@@ -2153,10 +2221,15 @@ function M.run(config)
local sub_msgs = scrub_messages(
{ { role = "user", content = prompt } },
secrets_mode_for(sub_cfg))
local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs)
-- Phase 7: capture (text, usage); second is err on failure.
local sub_text, second = broker.chat(sub_cfg, sub_msgs,
{ category = "delegate" })
if not sub_text then
renderer.status(("delegate %s failed: %s"):format(preset, tostring(sub_err)))
renderer.status(("delegate %s failed: %s"):format(preset, tostring(second)))
else
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
if secrets_session then
sub_text = secrets_session:rehydrate(sub_text)
end