broker + repl + safety: GBNF grammar-sampling passthrough (closes #88)
llama.cpp constrains the sampler to ONLY emit tokens matching a GBNF grammar. For small models this kills format drift at the token level — `CMD: <cmd>` is enforced by the sampler rather than hoped for via prompt discipline. Probe finding (this commit's pre-implementation): cloud (Anthropic via Bedrock) silently IGNORES the `grammar` field — returns normally via standard sampling. Default passthrough is safe for all routes; no per-model opt-in/opt-out needed in v1. Changes: - broker.lua build_request: `if opts.grammar then req.grammar = opts.grammar end`. Misformed grammar surfaces at request time via the existing transport-error path. - repl.lua ask_ai: `grammar_override = config.routing.grammars [req_class]` (same gating shape as #86's system_prompts override). Passed via opts.grammar in the call_broker invocation. - safety.lua is_destructive threads cfg.safety.probe_grammar through opts.grammar so llm_probe constrains the YES/NO output. Skips the regex-match dance entirely when the model can't drift. Caller-provided opts.grammar takes precedence over cfg. - config.lua gains two commented examples: * routing.grammars per class * safety.probe_grammar for the destructive probe 6 unit cases verified (stubbed curl.post_sse / broker.chat): - default: no grammar in body - opts.grammar -> body contains grammar JSON-encoded - safety probe_grammar reaches llm_probe via opts - no probe_grammar configured -> opts.grammar nil - caller opts.grammar takes precedence over cfg.safety.probe_grammar E2E against live local broker: - `routing.grammars.default = "root ::= \\"ACK\\""` configured; prompted "tell me a long story about a fox" -> model output EXACTLY "ACK" (sampler forced; would normally produce paragraphs). Grammar passthrough end-to-end confirmed. Regression: test_safety 87/87, test_router_model 31/31, repl loads. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -61,6 +61,14 @@ local function build_request(model_cfg, messages, stream, opts)
|
|||||||
if stream and opts.include_usage ~= false then
|
if stream and opts.include_usage ~= false then
|
||||||
req.stream_options = { include_usage = true }
|
req.stream_options = { include_usage = true }
|
||||||
end
|
end
|
||||||
|
-- #88: GBNF grammar passthrough. llama.cpp constrains the sampler
|
||||||
|
-- to only emit tokens matching the grammar — eliminates format
|
||||||
|
-- drift on small models. Probed cloud (Anthropic via Bedrock)
|
||||||
|
-- silently ignores the field, so default passthrough is safe;
|
||||||
|
-- no per-model opt-out needed in v1. Misformed grammar produces
|
||||||
|
-- a broker error at request time (visible via the usual transport
|
||||||
|
-- error path).
|
||||||
|
if opts.grammar then req.grammar = opts.grammar end
|
||||||
return url, json.encode(req), build_headers(model_cfg),
|
return url, json.encode(req), build_headers(model_cfg),
|
||||||
(model_cfg.timeout_ms or 60000)
|
(model_cfg.timeout_ms or 60000)
|
||||||
end
|
end
|
||||||
|
|||||||
+23
@@ -260,6 +260,29 @@ return {
|
|||||||
-- Do not ask clarifying questions.]],
|
-- Do not ask clarifying questions.]],
|
||||||
-- -- reasoning routes to cloud; no override usually needed
|
-- -- reasoning routes to cloud; no override usually needed
|
||||||
-- },
|
-- },
|
||||||
|
--
|
||||||
|
-- -- Issue #88: per-class GBNF grammar passthrough. llama.cpp
|
||||||
|
-- -- constrains the sampler to ONLY emit tokens matching the
|
||||||
|
-- -- grammar — eliminates format drift on small models. Cloud
|
||||||
|
-- -- (Anthropic/Bedrock) silently ignores the field, so default
|
||||||
|
-- -- passthrough is safe; no per-model opt-out needed. Misformed
|
||||||
|
-- -- grammar surfaces as a broker error at request time.
|
||||||
|
-- grammars = {
|
||||||
|
-- code = [[root ::= "CMD: " [^\n]+ "\n"]],
|
||||||
|
-- default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]],
|
||||||
|
-- },
|
||||||
|
-- },
|
||||||
|
--
|
||||||
|
-- Issue #88 (continued): for the safety LLM probe (YES/NO
|
||||||
|
-- destructive classification), set safety.probe_grammar to force
|
||||||
|
-- the probe model to emit exactly YES or NO. Eliminates the
|
||||||
|
-- regex-match fallback for unparseable verdicts; small models
|
||||||
|
-- become reliable enough to use as the probe.
|
||||||
|
--
|
||||||
|
-- safety = {
|
||||||
|
-- llm_second_opinion = true,
|
||||||
|
-- llm_model = "fast",
|
||||||
|
-- probe_grammar = [[root ::= ("YES" | "NO")]],
|
||||||
-- },
|
-- },
|
||||||
|
|
||||||
-- ── Phase 5 context summarization on sliding-window eviction.
|
-- ── Phase 5 context summarization on sliding-window eviction.
|
||||||
|
|||||||
@@ -999,6 +999,14 @@ function M.run(config)
|
|||||||
and config.routing.system_prompts
|
and config.routing.system_prompts
|
||||||
and req_class
|
and req_class
|
||||||
and config.routing.system_prompts[req_class]
|
and config.routing.system_prompts[req_class]
|
||||||
|
-- #88: per-class GBNF grammar passthrough. llama.cpp constrains
|
||||||
|
-- the sampler to only emit tokens matching the grammar — kills
|
||||||
|
-- format drift on small models. Cloud silently ignores the
|
||||||
|
-- field (probed Anthropic/Bedrock returns normally).
|
||||||
|
local grammar_override = config.routing
|
||||||
|
and config.routing.grammars
|
||||||
|
and req_class
|
||||||
|
and config.routing.grammars[req_class]
|
||||||
|
|
||||||
local depth = 0
|
local depth = 0
|
||||||
local final_resp = ""
|
local final_resp = ""
|
||||||
@@ -1030,7 +1038,8 @@ function M.run(config)
|
|||||||
tool_calls_seen[#tool_calls_seen + 1] = payload
|
tool_calls_seen[#tool_calls_seen + 1] = payload
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
{ tools = tools_schema(), category = "main" })
|
{ tools = tools_schema(), category = "main",
|
||||||
|
grammar = grammar_override })
|
||||||
if rehydrator then
|
if rehydrator then
|
||||||
local tail = rehydrator:flush()
|
local tail = rehydrator:flush()
|
||||||
if tail ~= "" then
|
if tail ~= "" then
|
||||||
|
|||||||
+23
-2
@@ -192,8 +192,14 @@ local function llm_probe(model_cfg, system, cmd, opts)
|
|||||||
-- accumulator so :cost detail surfaces probe spend separately.
|
-- accumulator so :cost detail surfaces probe spend separately.
|
||||||
-- broker.chat returns (text, usage) on success; capture as
|
-- broker.chat returns (text, usage) on success; capture as
|
||||||
-- (reply, second) and branch on reply nil-ness.
|
-- (reply, second) and branch on reply nil-ness.
|
||||||
|
-- #88: optional GBNF grammar passthrough to constrain the probe
|
||||||
|
-- model's output to exactly the YES/NO tokens we'll accept. Set
|
||||||
|
-- via opts.grammar (caller forwards cfg.safety.probe_grammar).
|
||||||
|
-- Cloud silently ignores; local llama.cpp enforces. Skips the
|
||||||
|
-- regex match dance entirely when the model can't drift.
|
||||||
local reply, second = broker.chat(model_cfg, msgs,
|
local reply, second = broker.chat(model_cfg, msgs,
|
||||||
{ max_tokens = 4, timeout_ms = PROBE_TIMEOUT_MS, category = "probe" })
|
{ max_tokens = 4, timeout_ms = PROBE_TIMEOUT_MS, category = "probe",
|
||||||
|
grammar = opts and opts.grammar })
|
||||||
if not reply then
|
if not reply then
|
||||||
-- Broker failure → safe default: treat as YES (destructive)
|
-- Broker failure → safe default: treat as YES (destructive)
|
||||||
return "YES_FAILSAFE", second
|
return "YES_FAILSAFE", second
|
||||||
@@ -293,7 +299,22 @@ function M.is_destructive(cmd, cfg, opts)
|
|||||||
return false, nil
|
return false, nil
|
||||||
end
|
end
|
||||||
|
|
||||||
return llm_second_opinion(cmd, cfg, opts)
|
-- #88: thread cfg.safety.probe_grammar through opts.grammar so
|
||||||
|
-- llm_probe can constrain the model's output to YES/NO. Cloud
|
||||||
|
-- ignores; local llama.cpp enforces. Existing caller opts
|
||||||
|
-- (scrub_msgs, rehydrate, on_usage) preserved.
|
||||||
|
local merged_opts = opts and {} or nil
|
||||||
|
if opts then
|
||||||
|
for k, v in pairs(opts) do merged_opts[k] = v end
|
||||||
|
end
|
||||||
|
if cfg.safety and cfg.safety.probe_grammar then
|
||||||
|
merged_opts = merged_opts or {}
|
||||||
|
if not merged_opts.grammar then
|
||||||
|
merged_opts.grammar = cfg.safety.probe_grammar
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return llm_second_opinion(cmd, cfg, merged_opts)
|
||||||
end
|
end
|
||||||
|
|
||||||
-- Expose the pattern table for `:safety patterns` meta and for testing.
|
-- Expose the pattern table for `:safety patterns` meta and for testing.
|
||||||
|
|||||||
Reference in New Issue
Block a user