diff --git a/broker.lua b/broker.lua
index 2527200..290a113 100644
--- a/broker.lua
+++ b/broker.lua
@@ -61,6 +61,14 @@ local function build_request(model_cfg, messages, stream, opts)
     if stream and opts.include_usage ~= false then
         req.stream_options = { include_usage = true }
     end
+    -- #88: GBNF grammar passthrough. llama.cpp constrains the sampler
+    -- to only emit tokens matching the grammar — eliminates format
+    -- drift on small models. Probed cloud (Anthropic via Bedrock)
+    -- silently ignores the field, so default passthrough is safe;
+    -- no per-model opt-out needed in v1. Misformed grammar produces
+    -- a broker error at request time (visible via the usual transport
+    -- error path).
+    if opts.grammar then req.grammar = opts.grammar end
     return url, json.encode(req), build_headers(model_cfg),
            (model_cfg.timeout_ms or 60000)
 end
diff --git a/config.lua b/config.lua
index d81f911..58bc39a 100644
--- a/config.lua
+++ b/config.lua
@@ -260,6 +260,29 @@ return {
     -- Do not ask clarifying questions.]],
     --         -- reasoning routes to cloud; no override usually needed
     --     },
+    --
+    --     -- Issue #88: per-class GBNF grammar passthrough. llama.cpp
+    --     -- constrains the sampler to ONLY emit tokens matching the
+    --     -- grammar — eliminates format drift on small models. Cloud
+    --     -- (Anthropic/Bedrock) silently ignores the field, so default
+    --     -- passthrough is safe; no per-model opt-out needed. Misformed
+    --     -- grammar surfaces as a broker error at request time.
+    --     grammars = {
+    --         code    = [[root ::= "CMD: " [^\n]+ "\n"]],
+    --         default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]],
+    --     },
+    -- },
+    --
+    -- Issue #88 (continued): for the safety LLM probe (YES/NO
+    -- destructive classification), set safety.probe_grammar to force
+    -- the probe model to emit exactly YES or NO. Eliminates the
+    -- regex-match fallback for unparseable verdicts; small models
+    -- become reliable enough to use as the probe.
+    --
+    -- safety = {
+    --     llm_second_opinion = true,
+    --     llm_model          = "fast",
+    --     probe_grammar      = [[root ::= ("YES" | "NO")]],
     -- },
 
     -- ── Phase 5 context summarization on sliding-window eviction.
diff --git a/repl.lua b/repl.lua
index dcf5d79..fdbdd1a 100644
--- a/repl.lua
+++ b/repl.lua
@@ -999,6 +999,14 @@ function M.run(config)
                          and config.routing.system_prompts
                          and req_class
                          and config.routing.system_prompts[req_class]
+        -- #88: per-class GBNF grammar passthrough. llama.cpp constrains
+        -- the sampler to only emit tokens matching the grammar — kills
+        -- format drift on small models. Cloud silently ignores the
+        -- field (probed Anthropic/Bedrock returns normally).
+        local grammar_override = config.routing
+                             and config.routing.grammars
+                             and req_class
+                             and config.routing.grammars[req_class]
 
         local depth = 0
         local final_resp = ""
@@ -1030,7 +1038,8 @@ function M.run(config)
                         tool_calls_seen[#tool_calls_seen + 1] = payload
                     end
                 end,
-                { tools = tools_schema(), category = "main" })
+                { tools = tools_schema(), category = "main",
+                  grammar = grammar_override })
             if rehydrator then
                 local tail = rehydrator:flush()
                 if tail ~= "" then
diff --git a/safety.lua b/safety.lua
index 4076c25..b8f3e4f 100644
--- a/safety.lua
+++ b/safety.lua
@@ -192,8 +192,14 @@ local function llm_probe(model_cfg, system, cmd, opts)
     -- accumulator so :cost detail surfaces probe spend separately.
     -- broker.chat returns (text, usage) on success; capture as
     -- (reply, second) and branch on reply nil-ness.
+    -- #88: optional GBNF grammar passthrough to constrain the probe
+    -- model's output to exactly the YES/NO tokens we'll accept. Set
+    -- via opts.grammar (caller forwards cfg.safety.probe_grammar).
+    -- Cloud silently ignores; local llama.cpp enforces. Skips the
+    -- regex match dance entirely when the model can't drift.
     local reply, second = broker.chat(model_cfg, msgs,
-        { max_tokens = 4, timeout_ms = PROBE_TIMEOUT_MS, category = "probe" })
+        { max_tokens = 4, timeout_ms = PROBE_TIMEOUT_MS, category = "probe",
+          grammar = opts and opts.grammar })
     if not reply then
         -- Broker failure → safe default: treat as YES (destructive)
         return "YES_FAILSAFE", second
@@ -293,7 +299,22 @@ function M.is_destructive(cmd, cfg, opts)
         return false, nil
     end
 
-    return llm_second_opinion(cmd, cfg, opts)
+    -- #88: thread cfg.safety.probe_grammar through opts.grammar so
+    -- llm_probe can constrain the model's output to YES/NO. Cloud
+    -- ignores; local llama.cpp enforces. Existing caller opts
+    -- (scrub_msgs, rehydrate, on_usage) preserved.
+    local merged_opts = opts and {} or nil
+    if opts then
+        for k, v in pairs(opts) do merged_opts[k] = v end
+    end
+    if cfg.safety and cfg.safety.probe_grammar then
+        merged_opts = merged_opts or {}
+        if not merged_opts.grammar then
+            merged_opts.grammar = cfg.safety.probe_grammar
+        end
+    end
+
+    return llm_second_opinion(cmd, cfg, merged_opts)
 end
 
 -- Expose the pattern table for `:safety patterns` meta and for testing.