Files
lmcp/server.lua
test0r c5375b8a77 v1.2.1/#22: LMCP_HOST + LMCP_CONF env support
Adds two env vars to the packaged server.lua so hosts can switch
fully to the packaged entrypoint (combined with v1.2.0's tools.d/
plugin scan):

  LMCP_HOST — interface to bind on (default 0.0.0.0). Hosts that
              need .18-only binding (hertz) or similar single-NIC
              constraints set this. Threaded into lmcp.new opts.host.
  LMCP_CONF — path to a conf file with bearer-token entries (e.g.
              /opt/herding/etc/hertz-tools.conf). Read by lmcp.lua's
              read_conf; the `.godparticle` entry becomes the bearer
              token. Threaded into lmcp.new opts.conf.

Both unset → unchanged behavior (binds 0.0.0.0, no conf file).

Together with v1.2.0's tools.d/ scan, this lets a host like hertz
ship NO override server.lua — just an /opt/lmcp/tools.d/hertz.lua
plugin file and a systemd unit that points at the packaged
server.lua with LMCP_HOST=192.168.88.18 + LMCP_CONF=/opt/herding/
etc/hertz-tools.conf. apt upgrade then delivers all packaged
improvements automatically.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 23:33:30 +00:00

1127 lines
44 KiB
Lua

#!/usr/bin/env lua
-- lmcp server — cross-platform shell tools
-- Works on Linux, macOS, and Windows without modification.
-- SPDX-License-Identifier: MIT
-- Resolve package paths relative to this script
local dir = arg[0]:match('(.*[/\\])') or './'
local sep = package.config:sub(1, 1) -- '/' on Unix, '\\' on Windows
package.path = package.path .. ';' .. dir .. '?.lua'
-- Windows: add lua\ subdirectory for LuaSocket DLLs
if sep == '\\' then
package.cpath = package.cpath .. ';' .. dir .. 'lua\\?.dll'
.. ';' .. dir .. 'lua\\socket\\?.dll'
.. ';' .. dir .. 'lua\\mime\\?.dll'
end
local lmcp = require('lmcp')
-- ---- Platform detection ----
local WINDOWS = sep == '\\'
local function is_windows() return WINDOWS end
-- ---- Non-blocking command execution with timeout ----
-- io.popen blocks until the child exits. On any OS, a long-running
-- process (like a daemon) will hang lmcp forever. We work around this
-- by spawning into temp files and polling a sentinel.
local function tmpname()
if WINDOWS then
local tmp = os.getenv("TEMP") or "C:\\Windows\\Temp"
return tmp .. "\\lmcp_" .. os.time() .. "_" .. math.random(10000, 99999)
else
return os.tmpname()
end
end
-- Lazy-required luasocket — only needed in the coroutine path for
-- gettime(). Avoids forcing luasocket as a hard dep at server.lua
-- load time (callers like example_server already require it via lmcp).
local _socket = nil
local function gettime()
if not _socket then _socket = require("socket") end
return _socket.gettime()
end
-- Lazy access to the lmcp module for cross-module ctx lookup (issue #11).
-- server.lua doesn't statically require lmcp (it's an example/runtime
-- server, not the library); but lmcp must already be loaded when we run.
-- Defensive: if the lookup fails for any reason, current_ctx returns nil
-- and run() falls back to non-cancellable behaviour.
local _lmcp_mod = nil
local function current_ctx()
if _lmcp_mod == false then return nil end
if _lmcp_mod == nil then
local ok, mod = pcall(require, "lmcp")
_lmcp_mod = ok and mod or false
if _lmcp_mod == false then return nil end
end
return _lmcp_mod.current_ctx and _lmcp_mod.current_ctx() or nil
end
-- in_coroutine() — true if we're running inside an lmcp dispatch
-- coroutine (issue #20). Handles both Lua 5.4 (coroutine.running →
-- (co, isMain)) and LuaJIT 5.1 (coroutine.running → nil on main).
local function in_coroutine()
local co, is_main = coroutine.running()
if co == nil then return false end -- 5.1 / LuaJIT main
if is_main then return false end -- 5.4 main thread
return true
end
local function sleep_ms(ms)
-- Coroutine-aware: yield with a wake deadline instead of busy-blocking.
-- The lmcp event loop services I/O for other connections while this
-- coroutine sleeps, then resumes it once the deadline elapses.
-- (Issue #20: gives concurrent tool dispatch without changing handler
-- source code — tools that go through run() get it for free.)
if in_coroutine() then
coroutine.yield({ wake_at = gettime() + (ms / 1000) })
return
end
if WINDOWS then
-- ping loopback: ~1s per -n count. For sub-second, use busy-wait.
if ms < 500 then
local target = os.clock() + ms / 1000
while os.clock() < target do end
else
local secs = math.ceil(ms / 1000)
os.execute("ping -n " .. (secs + 1) .. " 127.0.0.1 >nul 2>&1")
end
else
-- POSIX: use sleep command (supports fractional seconds on GNU)
if ms < 1000 then
os.execute("sleep 0." .. string.format("%03d", ms))
else
os.execute("sleep " .. math.ceil(ms / 1000))
end
end
end
local function file_exists(path)
local f = io.open(path, 'r')
if f then f:close(); return true end
return false
end
local function read_file(path)
local f = io.open(path, 'r')
if not f then return nil end
local c = f:read('*a'); f:close(); return c
end
local function remove_silent(path)
os.remove(path)
end
local function run(cmd, timeout_sec)
timeout_sec = timeout_sec or 120
local base = tmpname()
local out_file = base .. ".out"
local done_file = base .. ".done"
-- Wall-clock deadline rather than an accumulated interval-counter:
-- when we're inside a dispatch coroutine (issue #20), the scheduler
-- may delay our resume by more than `interval`, so an accumulator
-- diverges from real elapsed. gettime() comparison stays honest in
-- both busy-poll and yield-resume modes.
--
-- Auto-cancellation (issue #11): if a ctx is available on the
-- running coroutine AND it has been cancelled, exit the polling
-- loop early. The interval is capped at 500ms when a ctx is
-- present so worst-case cancel latency is ~0.5s, not ~2s.
local started = gettime()
local cancelled = false
local function poll_loop()
local interval = WINDOWS and 100 or 50 -- ms
while gettime() - started < timeout_sec do
if file_exists(done_file) then return true end
local ctx = current_ctx()
if ctx and ctx.cancelled and ctx.cancelled() then
cancelled = true
return false
end
sleep_ms(interval)
if interval < 2000 then interval = math.floor(interval * 1.5) end
-- When cancellable, cap so we can respond to cancel quickly.
if ctx and interval > 500 then interval = 500 end
end
return false
end
if WINDOWS then
-- Write a batch wrapper that runs the command and signals completion
local bat_file = base .. ".bat"
local bf = io.open(bat_file, 'w')
if not bf then return "Error: could not create temp file" end
bf:write("@echo off\r\n")
bf:write(cmd .. ' > "' .. out_file .. '" 2>&1\r\n')
bf:write('echo %ERRORLEVEL% > "' .. done_file .. '"\r\n')
bf:close()
os.execute('start /B cmd /C "' .. bat_file .. '"')
local completed = poll_loop()
local output = read_file(out_file)
remove_silent(bat_file)
remove_silent(out_file)
remove_silent(done_file)
if not completed then
if cancelled then return "(cancelled)" end
return output or ("Error: command timed out after " .. timeout_sec .. "s")
end
return output and output ~= "" and output or "(no output)"
else
-- POSIX: use shell backgrounding + wait with timeout
-- sh -c '(cmd > out 2>&1; echo $? > done) &' then poll
local sh_cmd = string.format(
"(%s) > '%s' 2>&1; echo $? > '%s'",
cmd, out_file, done_file
)
os.execute("sh -c '" .. sh_cmd:gsub("'", "'\\''") .. "' &")
local completed = poll_loop()
local output = read_file(out_file)
remove_silent(out_file)
remove_silent(done_file)
if not completed then
if cancelled then return "(cancelled)" end
return output or ("Error: command timed out after " .. timeout_sec .. "s")
end
return output and output ~= "" and output or "(no output)"
end
end
-- ---- Server setup ----
local server_name = os.getenv("LMCP_NAME") or (WINDOWS and "windows-tools" or "linux-tools")
local server = lmcp.new(server_name, {
port = tonumber(os.getenv("LMCP_PORT") or arg[1]) or 8080,
-- LMCP_HOST: bind interface (default 0.0.0.0). Hosts that need
-- single-interface binding (hertz: 192.168.88.18 only) set this.
host = os.getenv("LMCP_HOST"),
-- LMCP_CONF: path to a conf file with bearer-token entries
-- (e.g. /opt/herding/etc/hertz-tools.conf). Read by lmcp.lua's
-- read_conf; the `.godparticle` entry becomes the bearer token.
conf = os.getenv("LMCP_CONF"),
})
-- ---- Tools ----
server:tool("shell", "Execute a shell command.", {
type = "object",
properties = {
command = { type = "string", description = "Command to execute" },
cwd = { type = "string", description = "Working directory" },
timeout = { type = "integer", description = "Timeout in seconds", default = 120 },
powershell = { type = "boolean", description = "Use PowerShell (Windows only)", default = false },
},
required = { "command" },
}, function(a)
local cmd = a.command
if a.cwd then
if WINDOWS then
cmd = 'cd /d "' .. a.cwd .. '" && ' .. cmd
else
cmd = 'cd "' .. a.cwd .. '" && ' .. cmd
end
end
if a.powershell and WINDOWS then
cmd = 'powershell -NoProfile -Command "' .. cmd:gsub('"', '\\"') .. '"'
end
return run(cmd, a.timeout or 120)
end, {
annotations = {
title = "Run shell",
readOnlyHint = false,
destructiveHint = true,
idempotentHint = false,
openWorldHint = true,
},
})
server:tool("shell_bg",
"Fire-and-forget shell command (Linux-only). Fully detaches via setsid+nohup+stdio-redirect and returns immediately with PID and log path. Use for daemons that must outlive the lmcp request.",
{
type = "object",
properties = {
command = { type = "string", description = "Shell command to launch" },
cwd = { type = "string", description = "Working directory" },
log = { type = "string", description = "Log file (stdout+stderr). Default: /tmp/lmcp-bg-<ts>-<rand>.log" },
},
required = { "command" },
},
function(a)
if WINDOWS then
return "Error: shell_bg is Linux-only (Windows Start-Process equivalent TBD)"
end
if type(a.command) ~= "string" or a.command == "" then
return "Error: command required"
end
local log = a.log
if not log or log == "" then
log = string.format("/tmp/lmcp-bg-%d-%d.log", os.time(), math.random(1000, 9999))
end
local pid_file = log .. ".pid"
local inner = a.command
if a.cwd and a.cwd ~= "" then
inner = "cd '" .. a.cwd:gsub("'", "'\\''") .. "' && " .. inner
end
local sq = function(s) return "'" .. s:gsub("'", "'\\''") .. "'" end
local full = string.format(
"setsid nohup sh -c %s </dev/null >%s 2>&1 & echo $! > %s",
sq(inner), sq(log), sq(pid_file)
)
os.execute(full)
local f = io.open(pid_file, 'r')
local pid = "?"
if f then
pid = (f:read('*a') or ""):match("(%d+)") or "?"
f:close()
os.remove(pid_file)
end
return string.format("launched pid=%s log=%s", pid, log)
end, {
annotations = {
title = "Run shell (background)",
readOnlyHint = false,
destructiveHint = true,
idempotentHint = false,
openWorldHint = true,
},
})
server:tool("read_file", "Read a file.", {
type = "object",
properties = { path = { type = "string" } },
required = { "path" },
}, function(a)
local c = read_file(a.path)
if not c then return "Error: could not read " .. a.path end
return c
end, {
annotations = {
title = "Read file",
readOnlyHint = true,
destructiveHint = false,
idempotentHint = true,
openWorldHint = false,
},
})
server:tool("write_file", "Write content to a file.", {
type = "object",
properties = {
path = { type = "string" },
content = { type = "string" },
},
required = { "path", "content" },
}, function(a)
local f = io.open(a.path, 'w')
if not f then return "Error: could not write " .. a.path end
f:write(a.content); f:close()
return string.format("Written %d bytes to %s", #a.content, a.path)
end, {
annotations = {
title = "Write file",
readOnlyHint = false,
destructiveHint = true,
idempotentHint = true,
openWorldHint = false,
},
})
server:tool("edit_file", "Replace exact text in a file (literal match). Fails unless old_string is unique, unless replace_all=true.", {
type = "object",
properties = {
path = { type = "string", description = "Path to file" },
old_string = { type = "string", description = "Exact text to replace (literal, no regex)" },
new_string = { type = "string", description = "Replacement text" },
replace_all = { type = "boolean", description = "Replace every occurrence (default: false)", default = false },
},
required = { "path", "old_string", "new_string" },
}, function(a)
if type(a.path) ~= "string" or a.path == "" then return "Error: path required" end
if type(a.old_string) ~= "string" then return "Error: old_string required" end
if type(a.new_string) ~= "string" then return "Error: new_string required" end
if a.old_string == "" then return "Error: old_string cannot be empty" end
if a.old_string == a.new_string then return "Error: new_string must differ from old_string" end
local f = io.open(a.path, 'rb')
if not f then return "Error: could not read " .. a.path end
local content = f:read('*a'); f:close()
local count, pos = 0, 1
while pos <= #content do
local i = content:find(a.old_string, pos, true)
if not i then break end
count = count + 1
pos = i + #a.old_string
end
if count == 0 then
return "Error: old_string not found in " .. a.path
end
if count > 1 and not a.replace_all then
return string.format("Error: old_string matches %d times in %s (use replace_all=true or provide more surrounding context to disambiguate)", count, a.path)
end
local parts, p, replaced = {}, 1, 0
while true do
local i = content:find(a.old_string, p, true)
if not i then break end
parts[#parts+1] = content:sub(p, i-1)
parts[#parts+1] = a.new_string
p = i + #a.old_string
replaced = replaced + 1
if not a.replace_all then break end
end
parts[#parts+1] = content:sub(p)
local w = io.open(a.path, 'wb')
if not w then return "Error: could not write " .. a.path end
w:write(table.concat(parts)); w:close()
return string.format("Edited %s: %d replacement(s)", a.path, replaced)
end, {
annotations = {
title = "Edit file",
readOnlyHint = false,
destructiveHint = true,
idempotentHint = false,
openWorldHint = false,
},
})
server:tool("list_dir", "List directory contents.", {
type = "object",
properties = { path = { type = "string", default = "." } },
}, function(a)
local path = a.path or "."
if WINDOWS then
return run('dir /b "' .. path .. '"', 10)
else
return run("ls -1 '" .. path:gsub("'", "'\\''") .. "'", 10)
end
end, {
annotations = {
title = "List directory",
readOnlyHint = true,
destructiveHint = false,
idempotentHint = true,
openWorldHint = false,
},
})
-- ---- fetch: HTTP GET/HEAD with bounded body and optional HTML→plain rendering ----
--
-- Contract (per Phase 4 plan, issue #3):
-- 1. Transfer cap is enforced by curl --max-filesize, not by post-hoc
-- slicing. curl aborts mid-stream with exit 63 and the body file
-- holds up-to-N bytes (verified Phase 0).
-- 2. Curl exit code is recovered via -w "exit=%{exitcode}\n" because
-- run() captures stdout-only. Line-anchored parsing because
-- run()'s 2>&1 merges curl's stderr into the same stream.
-- 3. ok = (exit == 0 or exit == 63). exit 63 is a deliberate
-- truncation, not a failure — set truncated=true and ok=true.
-- 4. URL whitelist (RFC-3986-ish) rejects whitespace, control chars,
-- both quote styles in one shot — no per-platform branching.
-- 5. Renderer chain (plain, text/html only): pandoc → lynx → w3m →
-- pure-Lua strip. Probe results are process-local cached.
-- 6. os.execute return shape differs between Lua 5.1/LuaJIT (number)
-- and Lua 5.4 (boolean,...). fetch_have normalises both.
-- 7. timeout_s covers fetch *and* render combined.
local function fetch_html_strip(s)
if not s or s == "" then return "" end
s = s:gsub("<script.->.-</script>", " ")
s = s:gsub("<style.->.-</style>", " ")
s = s:gsub("<!%-%-.-%-%->", " ")
s = s:gsub("<[^>]+>", " ")
local ents = { amp = "&", lt = "<", gt = ">", quot = '"', apos = "'", nbsp = " " }
s = s:gsub("&(%a+);", function(n) return ents[n] or ("&" .. n .. ";") end)
s = s:gsub("&#(%d+);", function(n) return string.char(tonumber(n)) end)
s = s:gsub("&#x(%x+);", function(n) return string.char(tonumber(n, 16)) end)
s = s:gsub("%s+", " "):gsub("^%s+", ""):gsub("%s+$", "")
return s
end
local _fetch_have_cache = {}
local function fetch_have(cmd)
local cached = _fetch_have_cache[cmd]
if cached ~= nil then return cached end
local probe
if WINDOWS then
probe = "where " .. cmd .. " >NUL 2>&1"
else
probe = "command -v " .. cmd .. " >/dev/null 2>&1"
end
local rc = os.execute(probe)
if type(rc) == "number" then rc = (rc == 0) end
rc = rc and true or false
_fetch_have_cache[cmd] = rc
return rc
end
local function fetch_safe_url(url)
if type(url) ~= "string" or url == "" then
return false, "url required"
end
if not url:match("^https?://") then
return false, "url scheme must be http or https"
end
if not url:match("^https?://[%w%-._~:/?#%[%]@!%$&()*+,;=%%]+$") then
return false, "url contains disallowed characters (whitespace, quote, control)"
end
return true
end
local function fetch_parse_kv(blob)
local out = {}
for line in blob:gmatch("[^\r\n]+") do
local k, v = line:match("^(http_code)=(.*)$")
if k then out[k] = v end
k, v = line:match("^(content_type)=(.*)$")
if k then out[k] = v end
k, v = line:match("^(size_download)=(.*)$")
if k then out[k] = v end
k, v = line:match("^(exit)=(.*)$")
if k then out[k] = v end
end
return out
end
local function fetch_render_plain(body, body_file)
-- Try external renderers in order; each receives body_file on stdin.
local order = { "pandoc", "lynx", "w3m" }
for _, r in ipairs(order) do
if fetch_have(r) then
local cmd
if r == "pandoc" then
cmd = "pandoc -f html -t plain"
elseif r == "lynx" then
cmd = "lynx -stdin -dump -nolist -force_html"
else -- w3m
cmd = "w3m -dump -T text/html"
end
local pipe
if WINDOWS then
pipe = cmd .. ' < "' .. body_file .. '"'
else
pipe = cmd .. " < '" .. body_file:gsub("'", "'\\''") .. "'"
end
local out = run(pipe, 15)
if out and out ~= "" and not out:match("^Error:") then
return out, r
end
end
end
return fetch_html_strip(body), "lua-strip"
end
server:tool("fetch",
"HTTP GET/HEAD with bounded body and optional HTML→plain rendering. " ..
"timeout_s covers the entire fetch+render combined.",
{
type = "object",
properties = {
url = { type = "string", description = "http(s) URL" },
method = { type = "string", description = "GET or HEAD", default = "GET" },
render = { type = "string", description = "plain | html | raw", default = "plain" },
max_bytes = { type = "integer", description = "Hard cap on body bytes returned", default = 65536 },
timeout_s = { type = "integer", description = "Wall-clock cap for entire call", default = 20 },
user_agent = { type = "string", description = "Custom User-Agent", default = "lmcp-fetch/1.0" },
},
required = { "url" },
},
function(a)
local ok_url, url_err = fetch_safe_url(a.url)
if not ok_url then
return { ok = false, status = 0, content_type = "", bytes_read = 0,
truncated = false, renderer = "raw", body = "", error = url_err }
end
local method = (a.method or "GET"):upper()
if method ~= "GET" and method ~= "HEAD" then
return { ok = false, status = 0, content_type = "", bytes_read = 0,
truncated = false, renderer = "raw", body = "",
error = "method must be GET or HEAD" }
end
local render = a.render or "plain"
local max_bytes = tonumber(a.max_bytes) or 65536
local timeout_s = tonumber(a.timeout_s) or 20
local ua = a.user_agent or "lmcp-fetch/1.0"
local base = tmpname()
local hdr_file = base .. ".hdr"
local body_file = base .. ".body"
local wfmt = "http_code=%{http_code}\\ncontent_type=%{content_type}\\nsize_download=%{size_download}\\nexit=%{exitcode}\\n"
local curl_cmd
if WINDOWS then
local head_flag = (method == "HEAD") and " -I" or ""
curl_cmd = string.format(
'curl -sS --proto =http,https%s -X %s --max-time %d --max-filesize %d -A "%s" -D "%s" -o "%s" -w "%s" "%s"',
head_flag, method, timeout_s, max_bytes, ua, hdr_file, body_file, wfmt, a.url
)
else
local head_flag = (method == "HEAD") and " -I" or ""
curl_cmd = string.format(
"curl -sS --proto =http,https%s -X %s --max-time %d --max-filesize %d -A '%s' -D '%s' -o '%s' -w '%s' '%s'",
head_flag, method, timeout_s, max_bytes, ua, hdr_file, body_file, wfmt, a.url
)
end
local raw_out = run(curl_cmd, timeout_s + 5) or ""
local kv = fetch_parse_kv(raw_out)
local exit = tonumber(kv.exit or "") or -1
local http_code = tonumber(kv.http_code or "0") or 0
local content_type = kv.content_type or ""
local body = ""
if method ~= "HEAD" then
local bf = io.open(body_file, 'rb')
if bf then body = bf:read('*a') or ""; bf:close() end
end
remove_silent(hdr_file)
remove_silent(body_file)
-- Defensive cap (curl already capped, but enforce on the wire).
if #body > max_bytes then body = body:sub(1, max_bytes) end
local bytes_read = #body
local truncated = (exit == 63)
local transport_ok = (exit == 0 or exit == 63)
if not transport_ok then
-- Strip the -w block from raw_out for a clean error message.
local err_msg = raw_out:gsub("http_code=[^\n]*\n?", "")
:gsub("content_type=[^\n]*\n?", "")
:gsub("size_download=[^\n]*\n?", "")
:gsub("exit=[^\n]*\n?", "")
:gsub("^%s+", ""):gsub("%s+$", "")
if err_msg == "" then err_msg = "curl exit " .. tostring(exit) end
return { ok = false, status = 0, content_type = content_type,
bytes_read = 0, truncated = false, renderer = "raw",
body = "", error = err_msg }
end
local renderer, out_body
if render == "raw" or render == "html" or method == "HEAD" then
renderer, out_body = "raw", body
elseif render == "plain" then
local is_html = content_type:match("text/html") or content_type:match("xml")
if is_html and body ~= "" then
-- Re-materialise body to a temp for the renderer pipe.
local rf = tmpname() .. ".rbody"
local f = io.open(rf, 'wb')
if f then f:write(body); f:close() end
out_body, renderer = fetch_render_plain(body, rf)
remove_silent(rf)
else
renderer, out_body = "raw", body
end
else
return { ok = false, status = 0, content_type = content_type,
bytes_read = 0, truncated = false, renderer = "raw",
body = "", error = "render must be plain, html, or raw" }
end
if #out_body > max_bytes then out_body = out_body:sub(1, max_bytes) end
return {
ok = true,
status = http_code,
content_type = content_type,
bytes_read = bytes_read,
truncated = truncated,
renderer = renderer,
body = out_body,
}
end, {
annotations = {
title = "HTTP GET/HEAD",
readOnlyHint = true,
destructiveHint = false,
-- Idempotent in MCP sense: the tool itself has no effect on
-- its own environment. World-side variability is conveyed
-- by openWorldHint.
idempotentHint = true,
openWorldHint = true,
},
})
server:tool("search_files", "Search for files by pattern.", {
type = "object",
properties = {
pattern = { type = "string", description = "File name pattern" },
path = { type = "string", default = WINDOWS and "C:\\" or "/" },
},
required = { "pattern" },
}, function(a)
local path = a.path or (WINDOWS and "C:\\" or "/")
if WINDOWS then
return run('dir /b /s "' .. path .. '\\' .. a.pattern .. '"', 30)
else
-- -L: follow symlinks on the start path. macOS BSD find otherwise
-- silently emits nothing when the start path is itself a symlink
-- (common on Homebrew, e.g. /usr/local/share/lua -> Cellar/…/share/lua).
return run("find -L '" .. path:gsub("'", "'\\''") .. "' -name '" .. a.pattern:gsub("'", "'\\''") .. "' 2>/dev/null", 30)
end
end, {
annotations = {
title = "Find files by pattern",
readOnlyHint = true,
destructiveHint = false,
idempotentHint = true,
openWorldHint = false,
},
})
-- ---- web_search: pluggable-backend search with normalised result shape ----
--
-- Contract (per Phase 4 plan + Phase 5 review actions, issue #4):
-- 1. Backend selection: explicit LMCP_SEARCH_BACKEND (lower+trim) wins;
-- else first-present of SEARXNG_URL, TAVILY_API_KEY, BRAVE_API_KEY;
-- else "ddg" zero-config.
-- 2. Result envelope is always:
-- { ok, backend, query, results=[{title,url,snippet,age?}], error? }
-- On failure: ok=false, results=[], error=string.
-- 3. DDG is best-effort. The HTML endpoint serves anti-bot 202 pages
-- from many IP ranges; when the parser matches 0 results from a
-- 200/202, surface a structured "parser found 0" error rather
-- than a silent empty list.
-- 4. DDG parser iterates per-result-block, not per-class globally —
-- otherwise a missing snippet shifts later snippets onto wrong titles.
-- 5. DDG result URLs are unwrapped from /l/?uddg=<URLENCODED>. If
-- unwrap fails (no uddg= or non-http(s) result), the row is dropped.
-- 6. JSON backends (searxng/tavily/brave) use json.decode under pcall.
-- json.lua patched in this issue to combine UTF-16 surrogate pairs
-- so emoji/non-BMP CJK in snippets render correctly.
-- 7. Tavily uses Authorization: Bearer <key> header, not body, so the
-- key never lands in a tempfile.
-- 8. URL query strings are RFC-3986 unreserved-only encoded. After
-- encoding, the only attacker-controlled portion is shell-safe
-- inside single quotes.
local function ws_url_encode(s)
return (s:gsub("([^%w%-._~])", function(c)
return string.format("%%%02X", string.byte(c))
end))
end
local function ws_url_decode(s)
s = s:gsub("%%(%x%x)", function(h) return string.char(tonumber(h, 16)) end)
return s
end
local function ws_ddg_unwrap(href)
-- href shape: //duckduckgo.com/l/?uddg=<URLENC>&rut=<hex>
-- &amp; in raw HTML; pattern strips the entity first.
href = href:gsub("&amp;", "&")
local enc = href:match("[?&]uddg=([^&]+)")
if not enc then return nil end
local decoded = ws_url_decode(enc)
if not decoded:match("^https?://") then return nil end
return decoded
end
local function ws_safe_envurl(url)
if not url or url == "" then return false, "url empty" end
if not url:match("^https?://") then return false, "url scheme must be http(s)" end
if not url:match("^https?://[%w%-._~:/?#%[%]@!%$&()*+,;=%%]+$") then
return false, "url contains disallowed characters"
end
return true
end
local function ws_safe_key(s)
if not s or s == "" then return false, "empty" end
if s:find("['\"\n\r]") then return false, "contains quote or newline" end
return true
end
local function ws_curl_run(curl_cmd, body_file, timeout_s)
local raw_out = run(curl_cmd, timeout_s + 5) or ""
local http_code = tonumber(raw_out:match("http_code=(%d+)") or "0") or 0
local exit = tonumber(raw_out:match("exit=(%-?%d+)") or "-1") or -1
local body = ""
local bf = io.open(body_file, 'rb')
if bf then body = bf:read('*a') or ""; bf:close() end
remove_silent(body_file)
return body, http_code, exit, raw_out
end
local function ws_curl_err(raw_out, http_code, exit, default)
local err = raw_out:gsub("http_code=[^\n]*\n?", "")
:gsub("exit=[^\n]*\n?", "")
:gsub("^%s+", ""):gsub("%s+$", "")
if err ~= "" then return err end
if http_code ~= 0 and http_code ~= 200 then
return string.format("HTTP %d", http_code)
end
return default or ("curl exit " .. tostring(exit))
end
-- ---- DDG (HTML scrape, zero-config) ----
local function ws_ddg(query, n, region, time_range, safesearch)
local kp = ({off = -2, moderate = -1, strict = 1})[safesearch] or -1
local df = ({day = "d", week = "w", month = "m", year = "y"})[time_range or ""] or ""
local url = "https://html.duckduckgo.com/html/?q=" .. ws_url_encode(query)
.. "&kp=" .. tostring(kp)
if df ~= "" then url = url .. "&df=" .. df end
if region and region ~= "" then url = url .. "&kl=" .. ws_url_encode(region) end
local body_file = tmpname() .. ".body"
local wfmt = "http_code=%{http_code}\\nexit=%{exitcode}\\n"
local cmd
if WINDOWS then
cmd = string.format(
'curl -sS --proto =https --max-time 15 -A "lmcp-search/1.0" -o "%s" -w "%s" "%s"',
body_file, wfmt, url)
else
cmd = string.format(
"curl -sS --proto =https --max-time 15 -A 'lmcp-search/1.0' -o '%s' -w '%s' '%s'",
body_file, wfmt, url)
end
local body, http_code, exit, raw = ws_curl_run(cmd, body_file, 15)
if exit ~= 0 then
return nil, ws_curl_err(raw, http_code, exit, "ddg request failed")
end
-- Per-result-block iteration (avoids title↔snippet mispairing).
-- Split on the opening <div class="result results_links"… boundary
-- rather than on close-tag depth — DDG nests multiple <div>s inside
-- each block, so a fixed close-tag pattern is fragile.
local block_pat = '<div class="result results_links[^"]-"[^>]*>'
local positions = {}
for s in body:gmatch("()" .. block_pat) do positions[#positions + 1] = s end
positions[#positions + 1] = #body + 1 -- sentinel end-of-body
local results = {}
for i = 1, #positions - 1 do
local block = body:sub(positions[i], positions[i + 1] - 1)
local href, title_raw = block:match('<a[^>]-class="result__a"[^>]-href="([^"]+)"[^>]*>(.-)</a>')
if href and title_raw then
local real_url = ws_ddg_unwrap(href)
if real_url then
local snip_raw = block:match('<a[^>]-class="result__snippet"[^>]*>(.-)</a>') or ""
local title = fetch_html_strip(title_raw):sub(1, 200)
local snippet = fetch_html_strip(snip_raw):sub(1, 280)
results[#results + 1] = { title = title, url = real_url, snippet = snippet }
if #results >= n then break end
end
end
end
if #results == 0 then
return nil, "ddg parser matched no results (anti-bot challenge or markup change)"
end
return results, nil
end
-- ---- SearXNG (JSON) ----
local function ws_searxng(query, n, region, time_range, safesearch)
local base = os.getenv("SEARXNG_URL")
if not base or base == "" then return nil, "searxng requires SEARXNG_URL" end
base = base:gsub("/+$", "")
local ok, errmsg = ws_safe_envurl(base)
if not ok then return nil, "SEARXNG_URL: " .. errmsg end
local ss_map = { off = 0, moderate = 1, strict = 2 }
local url = base .. "/search?q=" .. ws_url_encode(query)
.. "&format=json&safesearch=" .. tostring(ss_map[safesearch] or 1)
if time_range and time_range ~= "" then
url = url .. "&time_range=" .. ws_url_encode(time_range)
end
if region and region ~= "" then
url = url .. "&language=" .. ws_url_encode(region)
end
local body_file = tmpname() .. ".body"
local wfmt = "http_code=%{http_code}\\nexit=%{exitcode}\\n"
local cmd
if WINDOWS then
cmd = string.format(
'curl -sS --proto =https --max-time 15 -A "lmcp-search/1.0" -o "%s" -w "%s" "%s"',
body_file, wfmt, url)
else
cmd = string.format(
"curl -sS --proto =https --max-time 15 -A 'lmcp-search/1.0' -o '%s' -w '%s' '%s'",
body_file, wfmt, url)
end
local body, http_code, exit, raw = ws_curl_run(cmd, body_file, 15)
if exit ~= 0 then
return nil, ws_curl_err(raw, http_code, exit, "searxng request failed")
end
if http_code ~= 200 then
return nil, string.format("searxng HTTP %d", http_code)
end
local pj_ok, d = pcall(require('json').decode, body)
if not pj_ok or type(d) ~= "table" or type(d.results) ~= "table" then
return nil, "searxng response is not valid JSON or missing 'results'"
end
local out = {}
for _, r in ipairs(d.results) do
if r.url and r.url ~= "" then
out[#out + 1] = {
title = (r.title or ""):sub(1, 200),
url = r.url,
snippet = (r.content or ""):sub(1, 280),
age = r.publishedDate,
}
if #out >= n then break end
end
end
if #out == 0 then
return nil, "searxng returned 0 results"
end
return out, nil
end
-- ---- Tavily (JSON POST) ----
local function ws_tavily(query, n)
local key = os.getenv("TAVILY_API_KEY")
if not key or key == "" then return nil, "tavily requires TAVILY_API_KEY" end
local ok, errmsg = ws_safe_key(key)
if not ok then return nil, "TAVILY_API_KEY: " .. errmsg end
local body_in = string.format(
'{"query":%s,"max_results":%d,"search_depth":"basic","include_answer":false}',
require('json').encode(query), n)
local in_file = tmpname() .. ".json"
local out_file = tmpname() .. ".body"
local fw = io.open(in_file, 'wb')
if not fw then return nil, "could not write tavily request body" end
fw:write(body_in); fw:close()
local wfmt = "http_code=%{http_code}\\nexit=%{exitcode}\\n"
local cmd
if WINDOWS then
cmd = string.format(
'curl -sS --proto =https --max-time 20 -X POST -H "Content-Type: application/json" -H "Authorization: Bearer %s" --data-binary "@%s" -o "%s" -w "%s" "https://api.tavily.com/search"',
key, in_file, out_file, wfmt)
else
cmd = string.format(
"curl -sS --proto =https --max-time 20 -X POST -H 'Content-Type: application/json' -H 'Authorization: Bearer %s' --data-binary '@%s' -o '%s' -w '%s' 'https://api.tavily.com/search'",
key, in_file, out_file, wfmt)
end
local body, http_code, exit, raw = ws_curl_run(cmd, out_file, 20)
remove_silent(in_file)
if exit ~= 0 then
return nil, ws_curl_err(raw, http_code, exit, "tavily request failed")
end
if http_code ~= 200 then
return nil, string.format("tavily HTTP %d", http_code)
end
local pj_ok, d = pcall(require('json').decode, body)
if not pj_ok or type(d) ~= "table" or type(d.results) ~= "table" then
return nil, "tavily response is not valid JSON or missing 'results'"
end
local out = {}
for _, r in ipairs(d.results) do
if r.url and r.url ~= "" then
out[#out + 1] = {
title = (r.title or ""):sub(1, 200),
url = r.url,
snippet = (r.content or ""):sub(1, 280),
}
if #out >= n then break end
end
end
if #out == 0 then return nil, "tavily returned 0 results" end
return out, nil
end
-- ---- Brave Search (JSON GET, header auth) ----
local function ws_brave(query, n, region, safesearch)
local key = os.getenv("BRAVE_API_KEY")
if not key or key == "" then return nil, "brave requires BRAVE_API_KEY" end
local ok, errmsg = ws_safe_key(key)
if not ok then return nil, "BRAVE_API_KEY: " .. errmsg end
local url = "https://api.search.brave.com/res/v1/web/search?q=" .. ws_url_encode(query)
.. "&count=" .. tostring(n)
.. "&safesearch=" .. (safesearch or "moderate")
if region and region ~= "" then url = url .. "&country=" .. ws_url_encode(region) end
local body_file = tmpname() .. ".body"
local wfmt = "http_code=%{http_code}\\nexit=%{exitcode}\\n"
local cmd
if WINDOWS then
cmd = string.format(
'curl -sS --proto =https --max-time 15 -A "lmcp-search/1.0" -H "Accept: application/json" -H "X-Subscription-Token: %s" -o "%s" -w "%s" "%s"',
key, body_file, wfmt, url)
else
cmd = string.format(
"curl -sS --proto =https --max-time 15 -A 'lmcp-search/1.0' -H 'Accept: application/json' -H 'X-Subscription-Token: %s' -o '%s' -w '%s' '%s'",
key, body_file, wfmt, url)
end
local body, http_code, exit, raw = ws_curl_run(cmd, body_file, 15)
if exit ~= 0 then
return nil, ws_curl_err(raw, http_code, exit, "brave request failed")
end
if http_code ~= 200 then
return nil, string.format("brave HTTP %d", http_code)
end
local pj_ok, d = pcall(require('json').decode, body)
if not pj_ok or type(d) ~= "table" or type(d.web) ~= "table" or type(d.web.results) ~= "table" then
return nil, "brave response is not valid JSON or missing 'web.results'"
end
local out = {}
for _, r in ipairs(d.web.results) do
if r.url and r.url ~= "" then
out[#out + 1] = {
title = (r.title or ""):sub(1, 200),
url = r.url,
snippet = (r.description or ""):sub(1, 280),
age = r.age,
}
if #out >= n then break end
end
end
if #out == 0 then return nil, "brave returned 0 results" end
return out, nil
end
local function ws_pick_backend()
local explicit = os.getenv("LMCP_SEARCH_BACKEND") or ""
explicit = explicit:lower():match("^%s*(.-)%s*$") or ""
if explicit ~= "" then return explicit end
if (os.getenv("SEARXNG_URL") or "") ~= "" then return "searxng" end
if (os.getenv("TAVILY_API_KEY") or "") ~= "" then return "tavily" end
if (os.getenv("BRAVE_API_KEY") or "") ~= "" then return "brave" end
return "ddg"
end
server:tool("web_search",
"Web search returning [{title, url, snippet, age?}]. Backend selected " ..
"via LMCP_SEARCH_BACKEND env (searxng|tavily|brave|ddg); auto-picks the " ..
"first configured backend, falling back to ddg (best-effort, often anti-bot blocked).",
{
type = "object",
properties = {
query = { type = "string", description = "Search query" },
max_results = { type = "integer", description = "1..25", default = 8 },
region = { type = "string", description = "Backend-specific locale (e.g. 'de-de')", default = "" },
time_range = { type = "string", description = "'' | day | week | month | year", default = "" },
safesearch = { type = "string", description = "off | moderate | strict", default = "moderate" },
},
required = { "query" },
},
function(a)
local query = (a.query or ""):match("^%s*(.-)%s*$") or ""
if query == "" then
return { ok = false, backend = "", query = "", results = {}, error = "query required" }
end
local n = tonumber(a.max_results) or 8
if n < 1 then n = 1 elseif n > 25 then n = 25 end
local backend = ws_pick_backend()
local region, time_range, safesearch = a.region or "", a.time_range or "", a.safesearch or "moderate"
local results, err
if backend == "ddg" then
results, err = ws_ddg(query, n, region, time_range, safesearch)
elseif backend == "searxng" then
results, err = ws_searxng(query, n, region, time_range, safesearch)
elseif backend == "tavily" then
results, err = ws_tavily(query, n)
elseif backend == "brave" then
results, err = ws_brave(query, n, region, safesearch)
else
return { ok = false, backend = backend, query = query, results = {},
error = "unknown backend: " .. backend }
end
if err then
return { ok = false, backend = backend, query = query, results = {}, error = err }
end
return { ok = true, backend = backend, query = query, results = results }
end, {
annotations = {
title = "Web search",
readOnlyHint = true,
destructiveHint = false,
idempotentHint = true,
openWorldHint = true,
},
})
if WINDOWS then
server:tool("systeminfo", "Get Windows system information.", {
type = "object", properties = {},
}, function() return run("systeminfo", 30) end, {
annotations = {
title = "Windows system info",
readOnlyHint = true,
destructiveHint = false,
idempotentHint = true,
openWorldHint = false,
},
})
end
-- ---- host-local tool plugins (issue #22) ----
-- Load every .lua file in LMCP_TOOLS_DIR (default /opt/lmcp/tools.d on POSIX,
-- %ProgramData%\lmcp\tools.d on Windows). Each file is invoked as a function
-- receiving the configured `server` instance and the `run` helper:
--
-- local server, run = ...
-- server:tool("my_local_tool", "...", {...}, function(a) return run(...) end)
--
-- This is the standard plugin pattern (nginx conf.d/, systemd-tmpfiles.d, …).
-- Hosts can ship their own tools alongside the packaged generics without
-- forking the upstream server.lua.
local plugin_dir = os.getenv("LMCP_TOOLS_DIR")
or (WINDOWS and (os.getenv("ProgramData") or "C:\\ProgramData") .. "\\lmcp\\tools.d"
or "/opt/lmcp/tools.d")
local list_cmd = WINDOWS
and ('dir /b "' .. plugin_dir .. '\\*.lua" 2>nul')
or ('ls -1 "' .. plugin_dir .. '"/*.lua 2>/dev/null')
local lh = io.popen(list_cmd)
if lh then
for path in lh:lines() do
-- On Windows `dir /b` emits bare filenames; prefix the dir.
local full = path:match("[/\\]") and path
or (plugin_dir .. (WINDOWS and "\\" or "/") .. path)
local chunk, err = loadfile(full)
if chunk then
local ok, perr = pcall(chunk, server, run)
if ok then
io.stderr:write("lmcp: loaded plugin " .. full .. "\n")
else
io.stderr:write("lmcp: plugin " .. full .. " errored: "
.. tostring(perr) .. "\n")
end
else
io.stderr:write("lmcp: plugin " .. full .. " load error: "
.. tostring(err) .. "\n")
end
end
lh:close()
end
local transport = os.getenv("LMCP_TRANSPORT") or "http"
if transport == "stdio" then
if os.getenv("LMCP_PORT") then
io.stderr:write("lmcp: LMCP_PORT ignored in stdio mode\n")
end
server:run_stdio()
else
io.stderr:write(string.format("lmcp %s starting on port %d (%s)\n",
server_name, server.port, WINDOWS and "Windows" or "POSIX"))
server:run()
end