router: classify_model heuristic + 31-case corpus (Phase 5 commit #1)
Phase 5 commit #1 per docs/PHASE5.md §11. Pure-Lua per-request model routing — no IO, no LLM probe in v1. router.classify_model(text, cfg) -> (model_name | nil, class_label): 1. classify_class(text) walks heuristics in priority order: code class: - triple-backtick fence anywhere - "traceback" / "stacktrace" / "stack trace" (ci) - "error:" / "exception:" in first 60 chars (ci) - path-with-code-extension token (.py/.lua/.c/.js/.go/.rs/.cpp/.h/.ts) - 5+ lines with indented content (looks like a paste) reasoning class (requires text >= 15 chars to skip bare keywords): - "explain" / "why " / "how does" / "compare" (ci) - "?" + length > 100 chars default class: everything else 2. Map class via cfg.routing.classes[class] → model name (or nil = keep current). 3. Return (model_name_or_nil, class_label). ALWAYS evaluates regardless of cfg.routing.auto — caller (repl.ask_ai in commit #3) gates on the flag. This separation lets `:route check` introspect the heuristic even when routing is off (N1). M._classify_class exposed for testing. Test corpus (test_router_model.lua, 31 cases): - 13 code-class positives (fence, traceback, paths, multi-line paste) - 6 reasoning-class positives (explain/why/how does/compare/?+length) - 8 default-class (short queries, bare keywords below 15-char threshold, non-code paths like .md/.txt) - 3 model-mapping cases (code→"deep", reasoning→"cloud", default→nil) - 1 R-N2 default test: classes.reasoning=nil → reasoning text yields nil model override (heuristic still fires, no swap) - All 31 pass; 15-char threshold catches "how does ASLR work?" without false-positive on bare "explain". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
-- test_router_model.lua — Phase 5 commit #1 corpus for classify_model.
|
||||
-- Run from repo root: `luajit test_router_model.lua` (exit 0 on pass).
|
||||
|
||||
package.path = "./?.lua;./vendor/?.lua;" .. package.path
|
||||
local router = require("router")
|
||||
|
||||
local cfg = {
|
||||
routing = {
|
||||
auto = true,
|
||||
classes = {
|
||||
code = "deep",
|
||||
reasoning = "cloud",
|
||||
default = nil, -- nil → keep current
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
-- (text, expected_class)
|
||||
local CASES = {
|
||||
-- ── code class
|
||||
{ "```python\ndef foo(): pass\n```", "code" },
|
||||
{ "Traceback (most recent call last):", "code" },
|
||||
{ "got a stack trace from my server", "code" },
|
||||
{ "error: undefined reference to `foo'", "code" },
|
||||
{ "exception: file not found", "code" },
|
||||
{ "please look at ./src/main.lua", "code" },
|
||||
{ "the issue is in ~/repos/foo/bar.py", "code" },
|
||||
{ "check /usr/lib/python3/dist-packages/x.py", "code" },
|
||||
{ "fix this:\n if x == 0:\n return\n else:\n pass\n", "code" },
|
||||
|
||||
-- ── reasoning class
|
||||
{ "Explain how MMAP works on Linux", "reasoning" },
|
||||
{ "why does my disk fill up so fast", "reasoning" },
|
||||
{ "how does ASLR work?", "reasoning" },
|
||||
{ "compare ZFS and btrfs in terms of snapshots", "reasoning" },
|
||||
{ "Can you explain the difference between a process and a thread in detail?", "reasoning" },
|
||||
{ "I have a long question with a question mark in it that goes well past one hundred characters does it route to reasoning?", "reasoning" },
|
||||
|
||||
-- ── default class — short queries, no markers
|
||||
{ "hi", "default" },
|
||||
{ "what time is it", "default" },
|
||||
{ "ls /tmp", "default" },
|
||||
{ "thanks", "default" },
|
||||
{ "explain", "default" }, -- bare "explain" < 30 chars threshold
|
||||
{ "why", "default" }, -- bare "why"
|
||||
{ "?", "default" }, -- bare ?
|
||||
{ "hello world", "default" },
|
||||
|
||||
-- ── edge: ambiguous — prefer false-positive into reasoning over false-negative
|
||||
{ "How does it feel to be a robot? Just curious.", "reasoning" }, -- 47 chars + how does
|
||||
-- ── edge: short error mention should still be code
|
||||
{ "got error: foo", "code" },
|
||||
|
||||
-- ── edge: a non-code path-like (e.g. README.md, .txt) should NOT match
|
||||
{ "see notes.md for details", "default" },
|
||||
{ "lookup /tmp/x.txt", "default" },
|
||||
}
|
||||
|
||||
local pass, fail = 0, 0
|
||||
local fails = {}
|
||||
for i, c in ipairs(CASES) do
|
||||
local _model, class = router.classify_model(c[1], cfg)
|
||||
if class == c[2] then
|
||||
pass = pass + 1
|
||||
else
|
||||
fail = fail + 1
|
||||
fails[#fails+1] = string.format(" [%2d] text=%q expected=%s got=%s",
|
||||
i, c[1]:sub(1, 60), c[2], tostring(class))
|
||||
end
|
||||
end
|
||||
|
||||
print(string.format("router.classify_model: %d/%d pass", pass, pass+fail))
|
||||
for _, f in ipairs(fails) do print(f) end
|
||||
|
||||
-- Verify model routing: code → "deep", reasoning → "cloud", default → nil
|
||||
local cases_model = {
|
||||
{ "Traceback", "deep", "code" },
|
||||
{ "Explain in detail how X works", "cloud", "reasoning" },
|
||||
{ "hi", nil, "default" },
|
||||
}
|
||||
print()
|
||||
for _, c in ipairs(cases_model) do
|
||||
local m, k = router.classify_model(c[1], cfg)
|
||||
if m == c[2] and k == c[3] then
|
||||
pass = pass + 1
|
||||
else
|
||||
fail = fail + 1
|
||||
fails[#fails+1] = string.format(
|
||||
" model: text=%q expected (%s,%s) got (%s,%s)",
|
||||
c[1], tostring(c[2]), c[3], tostring(m), tostring(k))
|
||||
end
|
||||
end
|
||||
|
||||
-- Verify the R-N2 default: when classes.reasoning = nil, reasoning text → no override
|
||||
local cfg_safe = { routing = { auto = true, classes = { code = "deep", reasoning = nil } } }
|
||||
local m, k = router.classify_model("Explain in detail how X works", cfg_safe)
|
||||
if m == nil and k == "reasoning" then
|
||||
pass = pass + 1
|
||||
else
|
||||
fail = fail + 1
|
||||
fails[#fails+1] = string.format(
|
||||
" reasoning-nil-default: got (%s,%s); want (nil,reasoning)",
|
||||
tostring(m), tostring(k))
|
||||
end
|
||||
|
||||
print(string.format("\nTOTAL: %d/%d pass", pass, pass+fail))
|
||||
for _, f in ipairs(fails) do print(f) end
|
||||
os.exit(fail == 0 and 0 or 1)
|
||||
Reference in New Issue
Block a user