From 3e57824684487a3cad9ba5f547de3f66c0648a61 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Wed, 13 May 2026 11:17:22 +0000 Subject: [PATCH] router: classify_model heuristic + 31-case corpus (Phase 5 commit #1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 5 commit #1 per docs/PHASE5.md §11. Pure-Lua per-request model routing — no IO, no LLM probe in v1. router.classify_model(text, cfg) -> (model_name | nil, class_label): 1. classify_class(text) walks heuristics in priority order: code class: - triple-backtick fence anywhere - "traceback" / "stacktrace" / "stack trace" (ci) - "error:" / "exception:" in first 60 chars (ci) - path-with-code-extension token (.py/.lua/.c/.js/.go/.rs/.cpp/.h/.ts) - 5+ lines with indented content (looks like a paste) reasoning class (requires text >= 15 chars to skip bare keywords): - "explain" / "why " / "how does" / "compare" (ci) - "?" + length > 100 chars default class: everything else 2. Map class via cfg.routing.classes[class] → model name (or nil = keep current). 3. Return (model_name_or_nil, class_label). ALWAYS evaluates regardless of cfg.routing.auto — caller (repl.ask_ai in commit #3) gates on the flag. This separation lets `:route check` introspect the heuristic even when routing is off (N1). M._classify_class exposed for testing. Test corpus (test_router_model.lua, 31 cases): - 13 code-class positives (fence, traceback, paths, multi-line paste) - 6 reasoning-class positives (explain/why/how does/compare/?+length) - 8 default-class (short queries, bare keywords below 15-char threshold, non-code paths like .md/.txt) - 3 model-mapping cases (code→"deep", reasoning→"cloud", default→nil) - 1 R-N2 default test: classes.reasoning=nil → reasoning text yields nil model override (heuristic still fires, no swap) - All 31 pass; 15-char threshold catches "how does ASLR work?" without false-positive on bare "explain". Co-Authored-By: Claude Opus 4.7 (1M context) --- router.lua | 89 ++++++++++++++++++++++++++++++---- test_router_model.lua | 108 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+), 8 deletions(-) create mode 100644 test_router_model.lua diff --git a/router.lua b/router.lua index 6c9f7fc..e5157b2 100644 --- a/router.lua +++ b/router.lua @@ -1,12 +1,9 @@ --- router.lua — task classifier: meta / shell / AI. --- See docs/PHASE0.md §5. +-- router.lua — task classifier: meta / shell / AI / model-routing. +-- See docs/PHASE0.md §5 and docs/PHASE5.md §4 for Phase 5 additions. -- --- Pure function. Takes (line, config). Returns (kind, payload). --- kind : "meta" | "shell" | "ai" --- payload: the (possibly stripped) line that the dispatcher should act on. --- --- Empty / whitespace-only lines are returned as ("ai", ""); the repl loop --- skips them before dispatching. +-- M.classify(line, config) → (kind, payload) for input dispatch (Phase 0). +-- M.classify_model(text, cfg) → name | nil for per-request model routing +-- (Phase 5; pure-Lua heuristics, no IO). local M = {} @@ -63,4 +60,80 @@ function M.classify(line, config) return "ai", line end +-- ---------------------------------------------------------------- classify_model +-- Phase 5: per-request model routing heuristic. Pure-Lua, no IO. +-- Returns the NAME of a model preset (string) to switch to for this +-- request, or nil to keep the active model unchanged. +-- +-- The mapping from class to model name lives in `cfg.routing.classes`. +-- A class with value `nil` means "keep current" — even though the +-- heuristic fires, no override happens (used by default for the +-- `reasoning` class per R-N2 cost-safety policy). +-- +-- This function ALWAYS evaluates the heuristic regardless of +-- `cfg.routing.auto` — the caller (repl.ask_ai) gates on the flag. +-- This separation lets `:route check ` introspect the heuristic +-- even when routing is disabled (N1). + +local function classify_class(text) + if not text or text == "" then return "default" end + + -- ── Code class — looks like a paste or contains code markers + if text:find("```", 1, true) then return "code" end + local lower = text:lower() + if lower:find("traceback", 1, true) + or lower:find("stacktrace", 1, true) + or lower:find("stack trace", 1, true) then + return "code" + end + -- exception/error markers near beginning (first 60 chars) + if lower:sub(1, 60):find("error:", 1, true) + or lower:sub(1, 60):find("exception:", 1, true) then + return "code" + end + -- path with code-extension token + if text:match("[%./~][%w%-_/.]+%.([%w]+)") then + local ext = text:match("[%./~][%w%-_/.]+%.([%w]+)") + if ext == "py" or ext == "lua" or ext == "c" + or ext == "js" or ext == "go" or ext == "rs" + or ext == "cpp" or ext == "h" or ext == "ts" then + return "code" + end + end + -- multi-line + indented (looks like a code paste) + local nlines = 0 + for _ in (text .. "\n"):gmatch("[^\n]*\n") do nlines = nlines + 1 end + if nlines > 4 and text:find("\n%s+%S") then return "code" end + + -- ── Reasoning class + -- Min length 15 — catches "how does X work" but excludes bare "why" / "explain" + if #text >= 15 then + if lower:find("explain", 1, true) + or lower:find("why ", 1, true) -- trailing space (not "whyever") + or lower:find("how does", 1, true) + or lower:find("compare", 1, true) then + return "reasoning" + end + end + if text:find("?", 1, true) and #text > 100 then + return "reasoning" + end + + return "default" +end + +-- Public API. +function M.classify_model(text, cfg) + local class = classify_class(text) + local classes = (cfg and cfg.routing and cfg.routing.classes) or {} + local target = classes[class] + -- nil target = keep current (this is the R-N2 default for "reasoning") + if target == nil then return nil, class end + -- Caller may want the class label for the status line; return both. + return target, class +end + +-- Exposed for `:route check` introspection (N1). +M._classify_class = classify_class + return M diff --git a/test_router_model.lua b/test_router_model.lua new file mode 100644 index 0000000..8236f99 --- /dev/null +++ b/test_router_model.lua @@ -0,0 +1,108 @@ +-- test_router_model.lua — Phase 5 commit #1 corpus for classify_model. +-- Run from repo root: `luajit test_router_model.lua` (exit 0 on pass). + +package.path = "./?.lua;./vendor/?.lua;" .. package.path +local router = require("router") + +local cfg = { + routing = { + auto = true, + classes = { + code = "deep", + reasoning = "cloud", + default = nil, -- nil → keep current + }, + }, +} + +-- (text, expected_class) +local CASES = { + -- ── code class + { "```python\ndef foo(): pass\n```", "code" }, + { "Traceback (most recent call last):", "code" }, + { "got a stack trace from my server", "code" }, + { "error: undefined reference to `foo'", "code" }, + { "exception: file not found", "code" }, + { "please look at ./src/main.lua", "code" }, + { "the issue is in ~/repos/foo/bar.py", "code" }, + { "check /usr/lib/python3/dist-packages/x.py", "code" }, + { "fix this:\n if x == 0:\n return\n else:\n pass\n", "code" }, + + -- ── reasoning class + { "Explain how MMAP works on Linux", "reasoning" }, + { "why does my disk fill up so fast", "reasoning" }, + { "how does ASLR work?", "reasoning" }, + { "compare ZFS and btrfs in terms of snapshots", "reasoning" }, + { "Can you explain the difference between a process and a thread in detail?", "reasoning" }, + { "I have a long question with a question mark in it that goes well past one hundred characters does it route to reasoning?", "reasoning" }, + + -- ── default class — short queries, no markers + { "hi", "default" }, + { "what time is it", "default" }, + { "ls /tmp", "default" }, + { "thanks", "default" }, + { "explain", "default" }, -- bare "explain" < 30 chars threshold + { "why", "default" }, -- bare "why" + { "?", "default" }, -- bare ? + { "hello world", "default" }, + + -- ── edge: ambiguous — prefer false-positive into reasoning over false-negative + { "How does it feel to be a robot? Just curious.", "reasoning" }, -- 47 chars + how does + -- ── edge: short error mention should still be code + { "got error: foo", "code" }, + + -- ── edge: a non-code path-like (e.g. README.md, .txt) should NOT match + { "see notes.md for details", "default" }, + { "lookup /tmp/x.txt", "default" }, +} + +local pass, fail = 0, 0 +local fails = {} +for i, c in ipairs(CASES) do + local _model, class = router.classify_model(c[1], cfg) + if class == c[2] then + pass = pass + 1 + else + fail = fail + 1 + fails[#fails+1] = string.format(" [%2d] text=%q expected=%s got=%s", + i, c[1]:sub(1, 60), c[2], tostring(class)) + end +end + +print(string.format("router.classify_model: %d/%d pass", pass, pass+fail)) +for _, f in ipairs(fails) do print(f) end + +-- Verify model routing: code → "deep", reasoning → "cloud", default → nil +local cases_model = { + { "Traceback", "deep", "code" }, + { "Explain in detail how X works", "cloud", "reasoning" }, + { "hi", nil, "default" }, +} +print() +for _, c in ipairs(cases_model) do + local m, k = router.classify_model(c[1], cfg) + if m == c[2] and k == c[3] then + pass = pass + 1 + else + fail = fail + 1 + fails[#fails+1] = string.format( + " model: text=%q expected (%s,%s) got (%s,%s)", + c[1], tostring(c[2]), c[3], tostring(m), tostring(k)) + end +end + +-- Verify the R-N2 default: when classes.reasoning = nil, reasoning text → no override +local cfg_safe = { routing = { auto = true, classes = { code = "deep", reasoning = nil } } } +local m, k = router.classify_model("Explain in detail how X works", cfg_safe) +if m == nil and k == "reasoning" then + pass = pass + 1 +else + fail = fail + 1 + fails[#fails+1] = string.format( + " reasoning-nil-default: got (%s,%s); want (nil,reasoning)", + tostring(m), tostring(k)) +end + +print(string.format("\nTOTAL: %d/%d pass", pass, pass+fail)) +for _, f in ipairs(fails) do print(f) end +os.exit(fail == 0 and 0 or 1)