repl: _record_usage helper + opts.category at 5 sites (Phase 7 commit #3)

Wires broker.lua's on_delta("usage", payload) and broker.chat's
(text, usage) return to the ctx accumulator via a single chokepoint.

Changes:

  - Forward decl `local _record_usage` near _bg_spawn — same pattern;
    the summarize-on-evict closure in make_summarize_fn (built at
    line 299) needs lexical access to _record_usage (assigned at
    line 695), so forward-declare and assign-without-`local`.

  - _record_usage(model, category, usage) — R5 central chokepoint:
    routes to ctx:add_usage, then checks the per-threshold warn
    state. R4: cost_warn_state has two independent flags (dollars
    and tokens) so first-to-fire doesn't suppress the other. R10:
    warn message uses $%.6f for sub-cent precision.

  - call_broker wrapper: wrapped on_delta now branches on
    kind == "usage" -> _record_usage(payload.model, payload.category,
    payload). R2: keys by payload.model (set inside broker.lua from
    model_cfg.model). When fallback fires, broker is called with
    fb_cfg, so payload.model IS the fallback's name automatically —
    wrapper doesn't track primary-vs-fallback itself.

  - 5 caller sites wired with opts.category:
      ask_ai call_broker             -> category="main"
      summarize-on-evict             -> category="summarize"
      DELEGATE: handler              -> category="delegate"
      :memory summarize              -> category="memory_summarize"
      :delegate meta                 -> category="delegate"

  - All 4 broker.chat call sites switched from
      local reply, err = broker.chat(...)
    to
      local reply, second = broker.chat(...)
    branching on reply nil-ness to interpret second (err on failure,
    usage on success). Captured usage routes through _record_usage.

E2E verified against live cloud broker:
  - cloud prompt -> reply "Hi! 👋"
  - Warn fired: "session cost $0.000219 has crossed warn_at_dollars=$0.000010"
  - R10 sub-cent precision visible in both numbers.

Norris + safety paths still untouched — commit #4 wires those.

Regression: test_safety 87/87, test_router_model 31/31, repl loads.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 23:00:06 +00:00
parent 7b4a9becc2
commit 8adebd52cc
+85 -12
View File
@@ -215,6 +215,14 @@ function M.run(config)
-- has to exist as a local in scope BEFORE ask_ai is declared. -- has to exist as a local in scope BEFORE ask_ai is declared.
local _bg_spawn local _bg_spawn
-- Phase 7 forward decl: _record_usage is the central chokepoint
-- for ctx:add_usage + warn-threshold check. Defined alongside
-- call_broker below, but needs to be in lexical scope of the
-- summarize-on-evict closure (which is built up earlier in
-- make_summarize_fn). Same forward-declaration pattern as
-- _bg_spawn — assign below, reference both early and late.
local _record_usage
-- Issue #13: secret redaction. Load vault if configured, create a -- Issue #13: secret redaction. Load vault if configured, create a
-- session for this conversation. ctx stores PLAIN; we scrub just -- session for this conversation. ctx stores PLAIN; we scrub just
-- before broker.chat_stream and rehydrate the streamed reply for -- before broker.chat_stream and rehydrate the streamed reply for
@@ -323,12 +331,19 @@ function M.run(config)
.. "No commentary, no markdown, no bullet lists." }, .. "No commentary, no markdown, no bullet lists." },
{ role = "user", content = body }, { role = "user", content = body },
}, secrets_mode_for(sum_cfg)) }, secrets_mode_for(sum_cfg))
local reply, err = broker.chat(sum_cfg, sum_msgs, -- Phase 7: broker.chat returns (text, usage) on success or
{ max_tokens = 300, timeout_ms = 30000 }) -- (nil, errmsg) on failure. Capture as (text, second); branch
-- on text nil-ness to interpret second.
local reply, second = broker.chat(sum_cfg, sum_msgs,
{ max_tokens = 300, timeout_ms = 30000,
category = "summarize" })
if not reply then if not reply then
renderer.status("context summarize failed: " .. tostring(err)) renderer.status("context summarize failed: " .. tostring(second))
return nil return nil
end end
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
if secrets_session then if secrets_session then
reply = secrets_session:rehydrate(reply) reply = secrets_session:rehydrate(reply)
end end
@@ -670,15 +685,58 @@ function M.run(config)
and fallback_reason(err) ~= nil and fallback_reason(err) ~= nil
end end
-- Phase 7 (R5): central chokepoint for usage recording. Wraps
-- ctx:add_usage AND does the warn-threshold check. All callers
-- (this file + safety.lua via helpers.on_usage / opts.on_usage)
-- route through here so the warn check fires exactly once per
-- accumulator update. Keeps context.lua decoupled from renderer.
-- R2: caller passes the model name that should be CREDITED — for
-- normal calls that's the active model; for fallback retries the
-- broker's payload.model (which IS the fallback's model_cfg.model
-- per broker emission) handles it correctly.
_record_usage = function(model, category, usage)
if not usage then return end
ctx:add_usage(model, category, usage)
if not (config.cost) then return end
local cw = ctx.cost_warn_state
if config.cost.warn_at_dollars and not cw.dollars then
local cost = ctx:total_cost()
if cost >= config.cost.warn_at_dollars then
renderer.status(("session cost $%.6f has crossed warn_at_dollars=$%.6f")
:format(cost, config.cost.warn_at_dollars))
cw.dollars = true
end
end
if config.cost.warn_at_tokens and not cw.tokens then
local p, c = ctx:total_tokens()
if (p + c) >= config.cost.warn_at_tokens then
renderer.status(("session tokens %d has crossed warn_at_tokens=%d")
:format(p + c, config.cost.warn_at_tokens))
cw.tokens = true
end
end
end
-- Wrap broker.chat_stream with the Phase 5 fallback-retry path. -- Wrap broker.chat_stream with the Phase 5 fallback-retry path.
-- Retries ONCE against cfg.routing.fallback_model (default "cloud") -- Retries ONCE against cfg.routing.fallback_model (default "cloud")
-- when (a) cfg.routing.fallback is true, (b) err matches a -- when (a) cfg.routing.fallback is true, (b) err matches a
-- fallback-eligible pattern, AND (c) no deltas have arrived yet -- fallback-eligible pattern, AND (c) no deltas have arrived yet
-- (mid-stream failures aren't retried — partial text would be -- (mid-stream failures aren't retried — partial text would be
-- duplicated). -- duplicated).
--
-- Phase 7 (R2): wrapped on_delta keys usage by payload.model
-- (set inside broker.lua from model_cfg.model — the
-- CALLER-INTENDED model name). When fallback fires, the broker
-- is called with fb_cfg, so payload.model is naturally the
-- fallback's model name — wrapper doesn't need to track
-- primary-vs-fallback itself.
local function call_broker(model_cfg, model_name, msgs, on_delta, opts) local function call_broker(model_cfg, model_name, msgs, on_delta, opts)
local any_delta = false local any_delta = false
local wrapped = function(kind, payload) local wrapped = function(kind, payload)
if kind == "usage" then
_record_usage(payload.model, payload.category, payload)
return -- usage isn't forwarded to the underlying on_delta
end
any_delta = true any_delta = true
return on_delta(kind, payload) return on_delta(kind, payload)
end end
@@ -939,7 +997,7 @@ function M.run(config)
tool_calls_seen[#tool_calls_seen + 1] = payload tool_calls_seen[#tool_calls_seen + 1] = payload
end end
end, end,
{ tools = tools_schema() }) { tools = tools_schema(), category = "main" })
if rehydrator then if rehydrator then
local tail = rehydrator:flush() local tail = rehydrator:flush()
if tail ~= "" then if tail ~= "" then
@@ -1101,12 +1159,17 @@ function M.run(config)
local sub_msgs = scrub_messages( local sub_msgs = scrub_messages(
{ { role = "user", content = d.prompt } }, { { role = "user", content = d.prompt } },
secrets_mode_for(sub_cfg)) secrets_mode_for(sub_cfg))
local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs) -- Phase 7: capture (text, usage) — second is err on failure.
local sub_text, second = broker.chat(sub_cfg, sub_msgs,
{ category = "delegate" })
if not sub_text then if not sub_text then
renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(sub_err))) renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(second)))
ctx:append_exec_output( ctx:append_exec_output(
("[delegate %s failed: %s]"):format(d.preset, tostring(sub_err))) ("[delegate %s failed: %s]"):format(d.preset, tostring(second)))
else else
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
-- Rehydrate the reply so the model sees its own -- Rehydrate the reply so the model sees its own
-- secrets restored when this gets re-serialized -- secrets restored when this gets re-serialized
-- on the next ask_ai turn. -- on the next ask_ai turn.
@@ -1584,13 +1647,18 @@ function M.run(config)
}, },
{ role = "user", content = transcript }, { role = "user", content = transcript },
}, secrets_mode_for(sum_cfg)) }, secrets_mode_for(sum_cfg))
local reply, err = broker.chat(sum_cfg, sum_msgs, -- Phase 7: capture (text, usage); second is err on failure.
{ max_tokens = 1024, timeout_ms = 90000 }) local reply, second = broker.chat(sum_cfg, sum_msgs,
{ max_tokens = 1024, timeout_ms = 90000,
category = "memory_summarize" })
if not reply then if not reply then
renderer.status("summarize failed: " .. tostring(err)) renderer.status("summarize failed: " .. tostring(second))
return return
end end
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
if secrets_session then if secrets_session then
reply = secrets_session:rehydrate(reply) reply = secrets_session:rehydrate(reply)
end end
@@ -2153,10 +2221,15 @@ function M.run(config)
local sub_msgs = scrub_messages( local sub_msgs = scrub_messages(
{ { role = "user", content = prompt } }, { { role = "user", content = prompt } },
secrets_mode_for(sub_cfg)) secrets_mode_for(sub_cfg))
local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs) -- Phase 7: capture (text, usage); second is err on failure.
local sub_text, second = broker.chat(sub_cfg, sub_msgs,
{ category = "delegate" })
if not sub_text then if not sub_text then
renderer.status(("delegate %s failed: %s"):format(preset, tostring(sub_err))) renderer.status(("delegate %s failed: %s"):format(preset, tostring(second)))
else else
if second then -- usage payload
_record_usage(second.model, second.category, second)
end
if secrets_session then if secrets_session then
sub_text = secrets_session:rehydrate(sub_text) sub_text = secrets_session:rehydrate(sub_text)
end end