repl + renderer: tree-sitter highlighter (Phase 6 commit #5)
The largest Phase 6 commit — fence-aware stream filter in renderer.lua
+ external tree-sitter dispatch + :highlight meta in repl.lua.
renderer.lua — fence-aware filter wrapping assistant_delta:
M.set_highlight(enabled, detected, highlight_fn)
Called by repl.lua at startup AND on every :highlight toggle.
Stores state in module-locals (off by default).
State machine inside _hl_push:
outside: pass chunks through; HOLD trailing partial-fence chars
(per R1 — local llama.cpp splits ```python as `'``'`
then `'`python\n'`, so naive pass-through drops the
leading "``" and never recovers).
inside: buffer cumulatively until "\n```" appears; emit
highlight_fn(body, lang) then the closing fence verbatim.
Recursive call handles "rest" after the closing fence.
N1: fences only open at start-of-stream OR after a newline
(`^```` or `\n```` only). Inline backticks in prose
("use ``` to mark code") do not open a fence.
R3 (PTY raw-mode toggle per highlight call): no change here — every
executor.exec call already toggles raw-mode (existing behavior
since Phase 1). The risk is theoretical; smoke-test interactively
after install if multi-fence renders show flicker.
assistant_flush handles end-of-stream gracefully: drains any held
partial-fence tail OR an unterminated inside-fence buffer.
repl.lua — _detect_treesitter + highlighted + :highlight meta:
_detect_treesitter() one-shot popen probe of `tree-sitter --version`.
Run once at startup; cached as
highlight_detected.
highlighted(body, lang_tag) R2-placed in repl.lua (has _shq +
executor access). Translates the fence
tag (`py`, `python`, `lua`, etc.) to
a canonical lang via LANG_TAG, picks
the canonical extension via LANG_EXTENSION,
writes body to a tmpfile with that
extension, runs `tree-sitter highlight
<tmpfile>` via executor.exec, returns
the output. On ANY failure (CLI absent,
non-zero exit, empty output), returns
`body` unchanged — silent pass-through.
R4 RESOLVED VIA REAL INSTALL: probed `tree-sitter highlight --help`
on noether; confirmed:
- NO `--lang` flag exists (formulate-time assumption wrong)
- takes a PATH; language inferred from file extension
- alternative `--scope source.X` exists but also unreliable
without configured grammars
Resolution: write tmpfile with `os.tmpname() .. LANG_EXTENSION[lang]`
and pass the path. Matches the documented upstream contract.
B4-followup: even with the CLI installed, highlighting requires
`~/.config/tree-sitter/config.json` parser-directories with
cloned + built `tree-sitter-<lang>` grammars. Without parsers,
every call exits non-zero and we silently pass through. The
:highlight install hint surfaces all three install steps so the
user knows what's actually needed.
:highlight [on|off|status] meta:
no arg -> flip
on/off -> set explicit
status -> report toggle + CLI detection state
When toggled on AND CLI absent: emit a 4-line install hint
(CLI install, init-config, grammar clone reminder).
When toggled on AND CLI present: emit a 1-line note that
parser-directories must be set up for actual highlighting.
HELP gains :highlight entry.
Tested:
10/10 unit cases on the renderer state machine, including:
- plain prose passthrough
- single-chunk fence
- B2 split fence ("``" + "`python\n" + "x=42" + "\n```")
- N1 SOL anchor (mid-line ``` does not open)
- trailing \n properly emitted across chunks
- SOL-only fence open
- prose after closing fence preserved
- two fences in one stream
- highlight off = passthrough (callback never fires)
E2E :highlight meta verified:
:highlight status -> off / detected
:highlight on -> toggles + emits parser-dir reminder
:highlight status -> on / detected
:highlight off -> off
Regression: test_safety 87/87, test_router_model 31/31, repl loads.
Pillars 1 + 2 + 3 of Phase 6 now all implemented. Commit #6 is config
example block + status -> Implement.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+121
-1
@@ -63,15 +63,135 @@ end
|
|||||||
|
|
||||||
local stream_buf = nil -- non-nil while a stream is in progress
|
local stream_buf = nil -- non-nil while a stream is in progress
|
||||||
|
|
||||||
|
-- Phase 6: fence-aware highlight filter. Off by default; toggled via
|
||||||
|
-- M.set_highlight(enabled, detected, highlight_fn). State machine:
|
||||||
|
-- outside: pass chunks through; hold a small tail when the suffix
|
||||||
|
-- could be the start of an opening fence (R1 — split fences
|
||||||
|
-- from local llama.cpp need accumulation).
|
||||||
|
-- inside: buffer until closing "\n```" is seen; emit
|
||||||
|
-- highlight_fn(body, lang) then the closing fence verbatim.
|
||||||
|
-- N1: fences only open at start-of-stream OR after a newline ("^```"
|
||||||
|
-- or "\n```"); inline backticks in prose don't trigger.
|
||||||
|
local hl_enabled = false
|
||||||
|
local hl_detected = false
|
||||||
|
local hl_fn = nil -- function(body, lang) -> rendered
|
||||||
|
local hl_state = "outside" -- "outside" | "inside"
|
||||||
|
local hl_tail = "" -- outside-state lookahead
|
||||||
|
local hl_inside_buf = "" -- inside-state buffer
|
||||||
|
local hl_lang = nil -- captured at fence open
|
||||||
|
|
||||||
|
function M.set_highlight(enabled, detected, highlight_fn)
|
||||||
|
hl_enabled = not not enabled
|
||||||
|
hl_detected = not not detected
|
||||||
|
hl_fn = highlight_fn
|
||||||
|
end
|
||||||
|
|
||||||
|
function M.highlight_state()
|
||||||
|
return { enabled = hl_enabled, detected = hl_detected }
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Longest suffix of `s` that is a prefix of any well-formed fence-open
|
||||||
|
-- marker ("\n```<lang>\n" or "```<lang>\n" at SOL). Returns the suffix
|
||||||
|
-- string. Bounded by max-lang-tag-length + 5.
|
||||||
|
local function _hl_partial_suffix(s)
|
||||||
|
-- Look back up to 32 chars.
|
||||||
|
local hi = math.min(#s, 32)
|
||||||
|
for k = hi, 1, -1 do
|
||||||
|
local cand = s:sub(#s - k + 1)
|
||||||
|
-- Possible prefixes of a fence-open:
|
||||||
|
-- "\n", "\n`", "\n``", "\n```", "\n```<langchars>"
|
||||||
|
-- if k == #s (full string == cand), also bare "`", "``", "```"
|
||||||
|
if cand:match("^\n`*[%w_-]*$") then return cand end
|
||||||
|
if (k == #s) and cand:match("^`*[%w_-]*$") and cand:find("`") then
|
||||||
|
return cand
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return ""
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Find fence open in combined string. Returns (fence_start, content_start,
|
||||||
|
-- lang) or nil. fence_start = index of first backtick; content_start =
|
||||||
|
-- index after the closing newline of the fence-info line.
|
||||||
|
local function _hl_find_open(combined)
|
||||||
|
-- Match at start-of-string OR after a newline.
|
||||||
|
local s, e, lang = combined:find("^```([%w_-]*)\n")
|
||||||
|
if s then return 1, e + 1, lang end
|
||||||
|
s, e, lang = combined:find("\n```([%w_-]*)\n")
|
||||||
|
if s then return s + 1, e + 1, lang end
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
local function _hl_push(chunk)
|
||||||
|
if not hl_enabled or not hl_fn then
|
||||||
|
emit(chunk)
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if hl_state == "outside" then
|
||||||
|
local combined = hl_tail .. chunk
|
||||||
|
local fs, cs, lang = _hl_find_open(combined)
|
||||||
|
if fs then
|
||||||
|
if fs > 1 then emit(combined:sub(1, fs - 1)) end
|
||||||
|
-- Emit the fence-open line verbatim too (model + user both
|
||||||
|
-- see "```python\n" — the highlighter only colorizes BODY).
|
||||||
|
emit(combined:sub(fs, cs - 1))
|
||||||
|
hl_state = "inside"
|
||||||
|
hl_lang = (lang ~= "" and lang) or nil
|
||||||
|
hl_inside_buf = combined:sub(cs)
|
||||||
|
hl_tail = ""
|
||||||
|
-- If the closing fence is already in the inside buffer
|
||||||
|
-- (cloud may deliver whole blocks in one chunk), drain.
|
||||||
|
if hl_inside_buf:find("\n```", 1, true) then
|
||||||
|
_hl_push("") -- triggers the inside branch's close detect
|
||||||
|
end
|
||||||
|
return
|
||||||
|
end
|
||||||
|
-- No opening fence — hold the trailing partial-fence so a
|
||||||
|
-- split-fence ("``" then "`python\n") gets recognized.
|
||||||
|
local hold = _hl_partial_suffix(combined)
|
||||||
|
if #hold < #combined then
|
||||||
|
emit(combined:sub(1, #combined - #hold))
|
||||||
|
end
|
||||||
|
hl_tail = hold
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
-- state == "inside"
|
||||||
|
hl_inside_buf = hl_inside_buf .. chunk
|
||||||
|
local cpos = hl_inside_buf:find("\n```", 1, true)
|
||||||
|
if not cpos then return end -- still buffering
|
||||||
|
local body = hl_inside_buf:sub(1, cpos - 1)
|
||||||
|
local closing = hl_inside_buf:sub(cpos, cpos + 3) -- "\n```"
|
||||||
|
local rest = hl_inside_buf:sub(cpos + 4)
|
||||||
|
local ok, rendered = pcall(hl_fn, body, hl_lang or "")
|
||||||
|
emit((ok and rendered) or body)
|
||||||
|
emit(closing)
|
||||||
|
hl_state = "outside"
|
||||||
|
hl_inside_buf = ""
|
||||||
|
hl_lang = nil
|
||||||
|
if rest ~= "" then _hl_push(rest) end
|
||||||
|
end
|
||||||
|
|
||||||
function M.assistant_delta(chunk)
|
function M.assistant_delta(chunk)
|
||||||
if not chunk or chunk == "" then return end
|
if not chunk or chunk == "" then return end
|
||||||
if stream_buf == nil then stream_buf = "" end
|
if stream_buf == nil then stream_buf = "" end
|
||||||
stream_buf = stream_buf .. chunk
|
stream_buf = stream_buf .. chunk
|
||||||
emit(chunk)
|
_hl_push(chunk)
|
||||||
end
|
end
|
||||||
|
|
||||||
function M.assistant_flush()
|
function M.assistant_flush()
|
||||||
if stream_buf == nil then return end
|
if stream_buf == nil then return end
|
||||||
|
-- Flush any held tail or in-progress fence body so the user sees it.
|
||||||
|
if hl_state == "inside" and hl_inside_buf ~= "" then
|
||||||
|
-- Stream ended mid-fence — emit raw (no highlight; no closing
|
||||||
|
-- fence was seen). User sees the partial code as-is.
|
||||||
|
emit(hl_inside_buf)
|
||||||
|
hl_inside_buf = ""
|
||||||
|
hl_state = "outside"
|
||||||
|
hl_lang = nil
|
||||||
|
elseif hl_tail ~= "" then
|
||||||
|
emit(hl_tail)
|
||||||
|
hl_tail = ""
|
||||||
|
end
|
||||||
if not stream_buf:match("\n$") then emit("\n") end
|
if not stream_buf:match("\n$") then emit("\n") end
|
||||||
stream_buf = nil
|
stream_buf = nil
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -185,6 +185,10 @@ Meta commands:
|
|||||||
:tree off clear the [project] block
|
:tree off clear the [project] block
|
||||||
:diff [<git-args>] git diff <args> -> inject as [diff ...] exec_output
|
:diff [<git-args>] git diff <args> -> inject as [diff ...] exec_output
|
||||||
examples: :diff :diff --cached :diff main..feature
|
examples: :diff :diff --cached :diff main..feature
|
||||||
|
:highlight [on|off|status]
|
||||||
|
toggle tree-sitter syntax highlighting on assistant
|
||||||
|
code fences (requires external tree-sitter CLI +
|
||||||
|
built grammars; off by default)
|
||||||
:delegate <p> <prompt> one-shot sub-broker call to preset <p>; prints reply
|
:delegate <p> <prompt> one-shot sub-broker call to preset <p>; prints reply
|
||||||
:help this message
|
:help this message
|
||||||
]]
|
]]
|
||||||
@@ -724,6 +728,79 @@ function M.run(config)
|
|||||||
-- invariant: every git invocation that flows back into context
|
-- invariant: every git invocation that flows back into context
|
||||||
-- runs with `--no-pager -c color.ui=never`.
|
-- runs with `--no-pager -c color.ui=never`.
|
||||||
|
|
||||||
|
-- Phase 6 highlighter (commit #5): tree-sitter CLI detection +
|
||||||
|
-- per-language extension map + path-based dispatch.
|
||||||
|
--
|
||||||
|
-- R4 resolution: the upstream `tree-sitter highlight` CLI takes a
|
||||||
|
-- PATH (no --lang flag); language is inferred from the file
|
||||||
|
-- extension. Empty `--scope source.X` is also unreliable
|
||||||
|
-- without configured grammars. So we name the tmpfile with the
|
||||||
|
-- canonical extension for `lang` and let the CLI dispatch.
|
||||||
|
--
|
||||||
|
-- Additional B4-followup: even with the CLI installed, highlighting
|
||||||
|
-- requires parser-directories configured AND grammars cloned + built.
|
||||||
|
-- Without those, every highlight call emits a warning to stderr and
|
||||||
|
-- returns empty stdout. We treat empty/error as pass-through (body
|
||||||
|
-- returned as-is).
|
||||||
|
local LANG_EXTENSION = {
|
||||||
|
lua = ".lua", python = ".py", javascript = ".js", typescript = ".ts",
|
||||||
|
bash = ".sh", c = ".c", cpp = ".cpp", rust = ".rs", go = ".go",
|
||||||
|
java = ".java", ruby = ".rb", markdown = ".md", json = ".json",
|
||||||
|
yaml = ".yaml", toml = ".toml", html = ".html", css = ".css",
|
||||||
|
sql = ".sql", xml = ".xml",
|
||||||
|
}
|
||||||
|
-- Map lang-tag (as it appears in ```<tag>) to canonical lang. Mirrors
|
||||||
|
-- expand_mentions LANG_BY_EXT but indexed by tag (e.g., "py" -> "python").
|
||||||
|
local LANG_TAG = {
|
||||||
|
py = "python", python = "python",
|
||||||
|
lua = "lua",
|
||||||
|
js = "javascript", javascript = "javascript",
|
||||||
|
ts = "typescript", typescript = "typescript",
|
||||||
|
sh = "bash", bash = "bash",
|
||||||
|
c = "c", cpp = "cpp", cc = "cpp",
|
||||||
|
rs = "rust", go = "go", java = "java", rb = "ruby", ruby = "ruby",
|
||||||
|
md = "markdown", markdown = "markdown",
|
||||||
|
json = "json", yaml = "yaml", yml = "yaml", toml = "toml",
|
||||||
|
html = "html", css = "css", sql = "sql", xml = "xml",
|
||||||
|
}
|
||||||
|
local function _detect_treesitter()
|
||||||
|
local pipe = io.popen("command -v tree-sitter 2>/dev/null && tree-sitter --version 2>/dev/null")
|
||||||
|
-- N2 / B3: pipe:close() returns true on LuaJIT regardless of exit
|
||||||
|
-- code; we don't use it for the verdict. Presence of an output
|
||||||
|
-- line from --version is the actual signal.
|
||||||
|
local ok = pipe and pipe:read("*l") and pipe:close()
|
||||||
|
return ok and true or false
|
||||||
|
end
|
||||||
|
local highlight_enabled = false
|
||||||
|
local highlight_detected = _detect_treesitter()
|
||||||
|
|
||||||
|
-- highlighted(body, lang_tag) — R2-placed in repl.lua so it has
|
||||||
|
-- access to _shq + executor. Returns the rendered body (with ANSI)
|
||||||
|
-- or `body` unchanged on any failure (silent pass-through so the
|
||||||
|
-- user never sees a broken highlighter swallow their code block).
|
||||||
|
local function highlighted(body, lang_tag)
|
||||||
|
if not highlight_enabled then return body end
|
||||||
|
local lang = LANG_TAG[(lang_tag or ""):lower()]
|
||||||
|
local ext = lang and LANG_EXTENSION[lang]
|
||||||
|
if not ext then return body end
|
||||||
|
-- B3: io.popen close doesn't expose exit code; route via
|
||||||
|
-- executor.exec (pty.spawn + waitpid) for reliable (out, code).
|
||||||
|
local tmp = os.tmpname() .. ext
|
||||||
|
local f = io.open(tmp, "wb")
|
||||||
|
if not f then return body end
|
||||||
|
f:write(body); f:close()
|
||||||
|
local out, code = executor.exec(
|
||||||
|
("tree-sitter highlight %s 2>/dev/null"):format(_shq(tmp)))
|
||||||
|
os.remove(tmp)
|
||||||
|
if code ~= 0 or not out or out == "" then return body end
|
||||||
|
return out
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Wire the filter into renderer (off by default; user opts in via
|
||||||
|
-- :highlight on). Even when off, we set the callback so a later
|
||||||
|
-- toggle works without reinitialization.
|
||||||
|
renderer.set_highlight(highlight_enabled, highlight_detected, highlighted)
|
||||||
|
|
||||||
-- Phase 6 (§6 + N4): project file-tree scanner. Prefers
|
-- Phase 6 (§6 + N4): project file-tree scanner. Prefers
|
||||||
-- `git -C <dir> ls-files --cached --others --exclude-standard`
|
-- `git -C <dir> ls-files --cached --others --exclude-standard`
|
||||||
-- when <dir> is inside a git repo (free .gitignore honor);
|
-- when <dir> is inside a git repo (free .gitignore honor);
|
||||||
@@ -1803,6 +1880,45 @@ function M.run(config)
|
|||||||
-- :tree <N> scan with depth=N; cached as _project_opts
|
-- :tree <N> scan with depth=N; cached as _project_opts
|
||||||
-- :tree refresh re-scan with cached opts; else config defaults
|
-- :tree refresh re-scan with cached opts; else config defaults
|
||||||
-- :tree off clear ctx.project AND ctx._project_opts
|
-- :tree off clear ctx.project AND ctx._project_opts
|
||||||
|
-- Phase 6: :highlight meta — toggle tree-sitter highlighter.
|
||||||
|
-- :highlight flip current setting
|
||||||
|
-- :highlight on enable (status warns if CLI absent
|
||||||
|
-- AND/OR parsers may not be installed)
|
||||||
|
-- :highlight off disable; renderer passes through
|
||||||
|
-- :highlight status report toggle + CLI detection state
|
||||||
|
meta.highlight = function(args)
|
||||||
|
local sub = ((args or ""):match("^%s*(%S*)") or ""):lower()
|
||||||
|
if sub == "status" then
|
||||||
|
renderer.status(("highlight: %s (tree-sitter CLI %s)"):format(
|
||||||
|
highlight_enabled and "on" or "off",
|
||||||
|
highlight_detected and "detected" or "absent"))
|
||||||
|
return
|
||||||
|
end
|
||||||
|
if sub == "" then
|
||||||
|
highlight_enabled = not highlight_enabled
|
||||||
|
elseif sub == "on" then
|
||||||
|
highlight_enabled = true
|
||||||
|
elseif sub == "off" then
|
||||||
|
highlight_enabled = false
|
||||||
|
else
|
||||||
|
renderer.status("usage: :highlight [on|off|status]")
|
||||||
|
return
|
||||||
|
end
|
||||||
|
renderer.set_highlight(highlight_enabled, highlight_detected, highlighted)
|
||||||
|
if highlight_enabled and not highlight_detected then
|
||||||
|
-- B4: install hint when toggled on but CLI absent. Also note
|
||||||
|
-- the parser-directory + grammar-clone requirement that
|
||||||
|
-- catches users who installed only the CLI.
|
||||||
|
renderer.status("highlight on but tree-sitter CLI not found.")
|
||||||
|
renderer.status("install: `apt install tree-sitter-cli` OR `cargo install tree-sitter-cli`")
|
||||||
|
renderer.status("then: `tree-sitter init-config` AND clone the relevant")
|
||||||
|
renderer.status("`tree-sitter-<lang>` grammars into a parser directory.")
|
||||||
|
elseif highlight_enabled then
|
||||||
|
renderer.status("highlight on (note: needs parser-directories with built tree-sitter-<lang> grammars)")
|
||||||
|
else
|
||||||
|
renderer.status("highlight off")
|
||||||
|
end
|
||||||
|
end
|
||||||
-- Phase 6: :diff meta — `git diff <args>` (B1-clean), appends as
|
-- Phase 6: :diff meta — `git diff <args>` (B1-clean), appends as
|
||||||
-- [diff <args>]\n<output> exec_output. Reads cwd at invocation
|
-- [diff <args>]\n<output> exec_output. Reads cwd at invocation
|
||||||
-- time (R6: differs from :tree's scan-time cwd capture). Empty
|
-- time (R6: differs from :tree's scan-time cwd capture). Empty
|
||||||
|
|||||||
Reference in New Issue
Block a user