repl: _record_usage helper + opts.category at 5 sites (Phase 7 commit #3)

Wires broker.lua's on_delta("usage", payload) and broker.chat's (text, usage) return to the ctx accumulator via a single chokepoint. Changes: - Forward decl `local _record_usage` near _bg_spawn — same pattern; the summarize-on-evict closure in make_summarize_fn (built at line 299) needs lexical access to _record_usage (assigned at line 695), so forward-declare and assign-without-`local`. - _record_usage(model, category, usage) — R5 central chokepoint: routes to ctx:add_usage, then checks the per-threshold warn state. R4: cost_warn_state has two independent flags (dollars and tokens) so first-to-fire doesn't suppress the other. R10: warn message uses $%.6f for sub-cent precision. - call_broker wrapper: wrapped on_delta now branches on kind == "usage" -> _record_usage(payload.model, payload.category, payload). R2: keys by payload.model (set inside broker.lua from model_cfg.model). When fallback fires, broker is called with fb_cfg, so payload.model IS the fallback's name automatically — wrapper doesn't track primary-vs-fallback itself. - 5 caller sites wired with opts.category: ask_ai call_broker -> category="main" summarize-on-evict -> category="summarize" DELEGATE: handler -> category="delegate" :memory summarize -> category="memory_summarize" :delegate meta -> category="delegate" - All 4 broker.chat call sites switched from local reply, err = broker.chat(...) to local reply, second = broker.chat(...) branching on reply nil-ness to interpret second (err on failure, usage on success). Captured usage routes through _record_usage. E2E verified against live cloud broker: - cloud prompt -> reply "Hi! 👋" - Warn fired: "session cost $0.000219 has crossed warn_at_dollars=$0.000010" - R10 sub-cent precision visible in both numbers. Norris + safety paths still untouched — commit #4 wires those. Regression: test_safety 87/87, test_router_model 31/31, repl loads. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 23:00:06 +00:00
parent 7b4a9becc2
commit 8adebd52cc
1 changed files with 85 additions and 12 deletions
@@ -215,6 +215,14 @@ function M.run(config)
    -- has to exist as a local in scope BEFORE ask_ai is declared.
    local _bg_spawn
    -- Phase 7 forward decl: _record_usage is the central chokepoint
    -- for ctx:add_usage + warn-threshold check. Defined alongside
    -- call_broker below, but needs to be in lexical scope of the
    -- summarize-on-evict closure (which is built up earlier in
    -- make_summarize_fn). Same forward-declaration pattern as
    -- _bg_spawn — assign below, reference both early and late.
    local _record_usage
    -- Issue #13: secret redaction. Load vault if configured, create a
    -- session for this conversation. ctx stores PLAIN; we scrub just
    -- before broker.chat_stream and rehydrate the streamed reply for
@@ -323,12 +331,19 @@ function M.run(config)
                    .. "No commentary, no markdown, no bullet lists." },
                { role = "user", content = body },
            }, secrets_mode_for(sum_cfg))
-            local reply, err = broker.chat(sum_cfg, sum_msgs,
+            -- Phase 7: broker.chat returns (text, usage) on success or
-                { max_tokens = 300, timeout_ms = 30000 })
+            -- (nil, errmsg) on failure. Capture as (text, second); branch
            -- on text nil-ness to interpret second.
            local reply, second = broker.chat(sum_cfg, sum_msgs,
                { max_tokens = 300, timeout_ms = 30000,
                  category = "summarize" })
            if not reply then
-                renderer.status("context summarize failed: " .. tostring(err))
+                renderer.status("context summarize failed: " .. tostring(second))
                return nil
            end
            if second then  -- usage payload
                _record_usage(second.model, second.category, second)
            end
            if secrets_session then
                reply = secrets_session:rehydrate(reply)
            end
@@ -670,15 +685,58 @@ function M.run(config)
               and fallback_reason(err) ~= nil
    end
    -- Phase 7 (R5): central chokepoint for usage recording. Wraps
    -- ctx:add_usage AND does the warn-threshold check. All callers
    -- (this file + safety.lua via helpers.on_usage / opts.on_usage)
    -- route through here so the warn check fires exactly once per
    -- accumulator update. Keeps context.lua decoupled from renderer.
    -- R2: caller passes the model name that should be CREDITED — for
    -- normal calls that's the active model; for fallback retries the
    -- broker's payload.model (which IS the fallback's model_cfg.model
    -- per broker emission) handles it correctly.
    _record_usage = function(model, category, usage)
        if not usage then return end
        ctx:add_usage(model, category, usage)
        if not (config.cost) then return end
        local cw = ctx.cost_warn_state
        if config.cost.warn_at_dollars and not cw.dollars then
            local cost = ctx:total_cost()
            if cost >= config.cost.warn_at_dollars then
                renderer.status(("session cost $%.6f has crossed warn_at_dollars=$%.6f")
                                :format(cost, config.cost.warn_at_dollars))
                cw.dollars = true
            end
        end
        if config.cost.warn_at_tokens and not cw.tokens then
            local p, c = ctx:total_tokens()
            if (p + c) >= config.cost.warn_at_tokens then
                renderer.status(("session tokens %d has crossed warn_at_tokens=%d")
                                :format(p + c, config.cost.warn_at_tokens))
                cw.tokens = true
            end
        end
    end
    -- Wrap broker.chat_stream with the Phase 5 fallback-retry path.
    -- Retries ONCE against cfg.routing.fallback_model (default "cloud")
    -- when (a) cfg.routing.fallback is true, (b) err matches a
    -- fallback-eligible pattern, AND (c) no deltas have arrived yet
    -- (mid-stream failures aren't retried — partial text would be
    -- duplicated).
    --
    -- Phase 7 (R2): wrapped on_delta keys usage by payload.model
    -- (set inside broker.lua from model_cfg.model — the
    -- CALLER-INTENDED model name). When fallback fires, the broker
    -- is called with fb_cfg, so payload.model is naturally the
    -- fallback's model name — wrapper doesn't need to track
    -- primary-vs-fallback itself.
    local function call_broker(model_cfg, model_name, msgs, on_delta, opts)
        local any_delta = false
        local wrapped = function(kind, payload)
            if kind == "usage" then
                _record_usage(payload.model, payload.category, payload)
                return  -- usage isn't forwarded to the underlying on_delta
            end
            any_delta = true
            return on_delta(kind, payload)
        end
@@ -939,7 +997,7 @@ function M.run(config)
                        tool_calls_seen[#tool_calls_seen + 1] = payload
                    end
                end,
-                { tools = tools_schema() })
+                { tools = tools_schema(), category = "main" })
            if rehydrator then
                local tail = rehydrator:flush()
                if tail ~= "" then
@@ -1101,12 +1159,17 @@ function M.run(config)
                local sub_msgs = scrub_messages(
                    { { role = "user", content = d.prompt } },
                    secrets_mode_for(sub_cfg))
-                local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs)
+                -- Phase 7: capture (text, usage) — second is err on failure.
                local sub_text, second = broker.chat(sub_cfg, sub_msgs,
                    { category = "delegate" })
                if not sub_text then
-                    renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(sub_err)))
+                    renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(second)))
                    ctx:append_exec_output(
-                        ("[delegate %s failed: %s]"):format(d.preset, tostring(sub_err)))
+                        ("[delegate %s failed: %s]"):format(d.preset, tostring(second)))
                else
                    if second then  -- usage payload
                        _record_usage(second.model, second.category, second)
                    end
                    -- Rehydrate the reply so the model sees its own
                    -- secrets restored when this gets re-serialized
                    -- on the next ask_ai turn.
@@ -1584,13 +1647,18 @@ function M.run(config)
                    },
                    { role = "user", content = transcript },
                }, secrets_mode_for(sum_cfg))
-                local reply, err = broker.chat(sum_cfg, sum_msgs,
+                -- Phase 7: capture (text, usage); second is err on failure.
-                                               { max_tokens = 1024, timeout_ms = 90000 })
+                local reply, second = broker.chat(sum_cfg, sum_msgs,
                    { max_tokens = 1024, timeout_ms = 90000,
                      category = "memory_summarize" })
                if not reply then
-                    renderer.status("summarize failed: " .. tostring(err))
+                    renderer.status("summarize failed: " .. tostring(second))
                    return
                end
                if second then  -- usage payload
                    _record_usage(second.model, second.category, second)
                end
                if secrets_session then
                    reply = secrets_session:rehydrate(reply)
                end
@@ -2153,10 +2221,15 @@ function M.run(config)
        local sub_msgs = scrub_messages(
            { { role = "user", content = prompt } },
            secrets_mode_for(sub_cfg))
-        local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs)
+        -- Phase 7: capture (text, usage); second is err on failure.
        local sub_text, second = broker.chat(sub_cfg, sub_msgs,
            { category = "delegate" })
        if not sub_text then
-            renderer.status(("delegate %s failed: %s"):format(preset, tostring(sub_err)))
+            renderer.status(("delegate %s failed: %s"):format(preset, tostring(second)))
        else
            if second then  -- usage payload
                _record_usage(second.model, second.category, second)
            end
            if secrets_session then
                sub_text = secrets_session:rehydrate(sub_text)
            end