diff --git a/repl.lua b/repl.lua index acc4f4d..23508f8 100644 --- a/repl.lua +++ b/repl.lua @@ -215,6 +215,14 @@ function M.run(config) -- has to exist as a local in scope BEFORE ask_ai is declared. local _bg_spawn + -- Phase 7 forward decl: _record_usage is the central chokepoint + -- for ctx:add_usage + warn-threshold check. Defined alongside + -- call_broker below, but needs to be in lexical scope of the + -- summarize-on-evict closure (which is built up earlier in + -- make_summarize_fn). Same forward-declaration pattern as + -- _bg_spawn — assign below, reference both early and late. + local _record_usage + -- Issue #13: secret redaction. Load vault if configured, create a -- session for this conversation. ctx stores PLAIN; we scrub just -- before broker.chat_stream and rehydrate the streamed reply for @@ -323,12 +331,19 @@ function M.run(config) .. "No commentary, no markdown, no bullet lists." }, { role = "user", content = body }, }, secrets_mode_for(sum_cfg)) - local reply, err = broker.chat(sum_cfg, sum_msgs, - { max_tokens = 300, timeout_ms = 30000 }) + -- Phase 7: broker.chat returns (text, usage) on success or + -- (nil, errmsg) on failure. Capture as (text, second); branch + -- on text nil-ness to interpret second. + local reply, second = broker.chat(sum_cfg, sum_msgs, + { max_tokens = 300, timeout_ms = 30000, + category = "summarize" }) if not reply then - renderer.status("context summarize failed: " .. tostring(err)) + renderer.status("context summarize failed: " .. tostring(second)) return nil end + if second then -- usage payload + _record_usage(second.model, second.category, second) + end if secrets_session then reply = secrets_session:rehydrate(reply) end @@ -670,15 +685,58 @@ function M.run(config) and fallback_reason(err) ~= nil end + -- Phase 7 (R5): central chokepoint for usage recording. Wraps + -- ctx:add_usage AND does the warn-threshold check. All callers + -- (this file + safety.lua via helpers.on_usage / opts.on_usage) + -- route through here so the warn check fires exactly once per + -- accumulator update. Keeps context.lua decoupled from renderer. + -- R2: caller passes the model name that should be CREDITED — for + -- normal calls that's the active model; for fallback retries the + -- broker's payload.model (which IS the fallback's model_cfg.model + -- per broker emission) handles it correctly. + _record_usage = function(model, category, usage) + if not usage then return end + ctx:add_usage(model, category, usage) + if not (config.cost) then return end + local cw = ctx.cost_warn_state + if config.cost.warn_at_dollars and not cw.dollars then + local cost = ctx:total_cost() + if cost >= config.cost.warn_at_dollars then + renderer.status(("session cost $%.6f has crossed warn_at_dollars=$%.6f") + :format(cost, config.cost.warn_at_dollars)) + cw.dollars = true + end + end + if config.cost.warn_at_tokens and not cw.tokens then + local p, c = ctx:total_tokens() + if (p + c) >= config.cost.warn_at_tokens then + renderer.status(("session tokens %d has crossed warn_at_tokens=%d") + :format(p + c, config.cost.warn_at_tokens)) + cw.tokens = true + end + end + end + -- Wrap broker.chat_stream with the Phase 5 fallback-retry path. -- Retries ONCE against cfg.routing.fallback_model (default "cloud") -- when (a) cfg.routing.fallback is true, (b) err matches a -- fallback-eligible pattern, AND (c) no deltas have arrived yet -- (mid-stream failures aren't retried — partial text would be -- duplicated). + -- + -- Phase 7 (R2): wrapped on_delta keys usage by payload.model + -- (set inside broker.lua from model_cfg.model — the + -- CALLER-INTENDED model name). When fallback fires, the broker + -- is called with fb_cfg, so payload.model is naturally the + -- fallback's model name — wrapper doesn't need to track + -- primary-vs-fallback itself. local function call_broker(model_cfg, model_name, msgs, on_delta, opts) local any_delta = false local wrapped = function(kind, payload) + if kind == "usage" then + _record_usage(payload.model, payload.category, payload) + return -- usage isn't forwarded to the underlying on_delta + end any_delta = true return on_delta(kind, payload) end @@ -939,7 +997,7 @@ function M.run(config) tool_calls_seen[#tool_calls_seen + 1] = payload end end, - { tools = tools_schema() }) + { tools = tools_schema(), category = "main" }) if rehydrator then local tail = rehydrator:flush() if tail ~= "" then @@ -1101,12 +1159,17 @@ function M.run(config) local sub_msgs = scrub_messages( { { role = "user", content = d.prompt } }, secrets_mode_for(sub_cfg)) - local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs) + -- Phase 7: capture (text, usage) — second is err on failure. + local sub_text, second = broker.chat(sub_cfg, sub_msgs, + { category = "delegate" }) if not sub_text then - renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(sub_err))) + renderer.status(("delegate %s failed: %s"):format(d.preset, tostring(second))) ctx:append_exec_output( - ("[delegate %s failed: %s]"):format(d.preset, tostring(sub_err))) + ("[delegate %s failed: %s]"):format(d.preset, tostring(second))) else + if second then -- usage payload + _record_usage(second.model, second.category, second) + end -- Rehydrate the reply so the model sees its own -- secrets restored when this gets re-serialized -- on the next ask_ai turn. @@ -1584,13 +1647,18 @@ function M.run(config) }, { role = "user", content = transcript }, }, secrets_mode_for(sum_cfg)) - local reply, err = broker.chat(sum_cfg, sum_msgs, - { max_tokens = 1024, timeout_ms = 90000 }) + -- Phase 7: capture (text, usage); second is err on failure. + local reply, second = broker.chat(sum_cfg, sum_msgs, + { max_tokens = 1024, timeout_ms = 90000, + category = "memory_summarize" }) if not reply then - renderer.status("summarize failed: " .. tostring(err)) + renderer.status("summarize failed: " .. tostring(second)) return end + if second then -- usage payload + _record_usage(second.model, second.category, second) + end if secrets_session then reply = secrets_session:rehydrate(reply) end @@ -2153,10 +2221,15 @@ function M.run(config) local sub_msgs = scrub_messages( { { role = "user", content = prompt } }, secrets_mode_for(sub_cfg)) - local sub_text, sub_err = broker.chat(sub_cfg, sub_msgs) + -- Phase 7: capture (text, usage); second is err on failure. + local sub_text, second = broker.chat(sub_cfg, sub_msgs, + { category = "delegate" }) if not sub_text then - renderer.status(("delegate %s failed: %s"):format(preset, tostring(sub_err))) + renderer.status(("delegate %s failed: %s"):format(preset, tostring(second))) else + if second then -- usage payload + _record_usage(second.model, second.category, second) + end if secrets_session then sub_text = secrets_session:rehydrate(sub_text) end