v1.1.0/#20: concurrent handler dispatch
Replaces the synchronous tools/call path with a coroutine-wrapped
dispatch. The select()-based event loop from v1.0.0-rc1 already
multiplexes I/O; this change extends the same single-thread
cooperative scheduling to tool handler execution.
How:
- server.lua:sleep_ms detects coroutine context and yields with
{ wake_at = gettime() + ms/1000 } instead of blocking. Falls back
to today's busy-blocking sleep when on the main thread (stdio
dispatch, init code).
- server.lua:run() now uses gettime() deltas for timeout accounting
(Phase 5 review fix — the prior interval-accumulator diverged
from wall-clock when scheduler delayed resumes).
- lmcp.lua wraps the handle_request call inside _dispatch_post in a
coroutine. Synchronous completion (no yield) takes the inline-
response path; if the handler yields, the coroutine parks in
self._pending_handlers and the conn enters dispatching_async.
- New _scheduler_tick services pending coroutines whose wake_at has
passed; on completion calls the shared _finalise_dispatch helper
to build the deferred HTTP response (Accept-aware: SSE or JSON).
- select() timeout tightens to the next pending wake_at so short
yields don't pay the full 100ms tick.
Measurement (Phase 7):
before: fast ping during slow shell sleep 3 = 4.28s
after: fast ping during slow shell sleep 3 = 0.01s (~400×)
3 parallel slow shells: 3.77s total wall (was ~9s).
Zero handler source-code changes. Every existing tool that goes
through run() (shell, shell_bg, fetch, web_search, list_dir,
search_files, systeminfo, hub remote_*) gets concurrency for free.
Pure-Lua handlers (ping, read_file, write_file, edit_file) continue
to complete inline. stdio transport stays serialised by design
(single-client per stdio process).
Known limits documented in memory project_handler_coroutines:
- socket.gettime() is wall-clock not monotonic; large NTP steps may
bunch resumes. Acceptable on chrony-slewed fleet.
- Cancellation (#11) is now tractable since the scheduler can flip a
flag between resumes — implementation pending.
- Server-initiated request await (sampling/roots from inside a
handler) still requires a future yield-on-pending helper.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+48
-21
@@ -35,7 +35,35 @@ local function tmpname()
|
||||
end
|
||||
end
|
||||
|
||||
-- Lazy-required luasocket — only needed in the coroutine path for
|
||||
-- gettime(). Avoids forcing luasocket as a hard dep at server.lua
|
||||
-- load time (callers like example_server already require it via lmcp).
|
||||
local _socket = nil
|
||||
local function gettime()
|
||||
if not _socket then _socket = require("socket") end
|
||||
return _socket.gettime()
|
||||
end
|
||||
|
||||
-- in_coroutine() — true if we're running inside an lmcp dispatch
|
||||
-- coroutine (issue #20). Handles both Lua 5.4 (coroutine.running →
|
||||
-- (co, isMain)) and LuaJIT 5.1 (coroutine.running → nil on main).
|
||||
local function in_coroutine()
|
||||
local co, is_main = coroutine.running()
|
||||
if co == nil then return false end -- 5.1 / LuaJIT main
|
||||
if is_main then return false end -- 5.4 main thread
|
||||
return true
|
||||
end
|
||||
|
||||
local function sleep_ms(ms)
|
||||
-- Coroutine-aware: yield with a wake deadline instead of busy-blocking.
|
||||
-- The lmcp event loop services I/O for other connections while this
|
||||
-- coroutine sleeps, then resumes it once the deadline elapses.
|
||||
-- (Issue #20: gives concurrent tool dispatch without changing handler
|
||||
-- source code — tools that go through run() get it for free.)
|
||||
if in_coroutine() then
|
||||
coroutine.yield({ wake_at = gettime() + (ms / 1000) })
|
||||
return
|
||||
end
|
||||
if WINDOWS then
|
||||
-- ping loopback: ~1s per -n count. For sub-second, use busy-wait.
|
||||
if ms < 500 then
|
||||
@@ -78,6 +106,22 @@ local function run(cmd, timeout_sec)
|
||||
local out_file = base .. ".out"
|
||||
local done_file = base .. ".done"
|
||||
|
||||
-- Wall-clock deadline rather than an accumulated interval-counter:
|
||||
-- when we're inside a dispatch coroutine (issue #20), the scheduler
|
||||
-- may delay our resume by more than `interval`, so an accumulator
|
||||
-- diverges from real elapsed. gettime() comparison stays honest in
|
||||
-- both busy-poll and yield-resume modes.
|
||||
local started = gettime()
|
||||
local function poll_loop()
|
||||
local interval = WINDOWS and 100 or 50 -- ms
|
||||
while gettime() - started < timeout_sec do
|
||||
if file_exists(done_file) then return true end
|
||||
sleep_ms(interval)
|
||||
if interval < 2000 then interval = math.floor(interval * 1.5) end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
if WINDOWS then
|
||||
-- Write a batch wrapper that runs the command and signals completion
|
||||
local bat_file = base .. ".bat"
|
||||
@@ -89,22 +133,13 @@ local function run(cmd, timeout_sec)
|
||||
bf:close()
|
||||
os.execute('start /B cmd /C "' .. bat_file .. '"')
|
||||
|
||||
-- Poll for sentinel
|
||||
local elapsed = 0
|
||||
local interval = 100 -- ms
|
||||
while elapsed < timeout_sec * 1000 do
|
||||
if file_exists(done_file) then break end
|
||||
sleep_ms(interval)
|
||||
elapsed = elapsed + interval
|
||||
if interval < 2000 then interval = math.floor(interval * 1.5) end
|
||||
end
|
||||
|
||||
local completed = poll_loop()
|
||||
local output = read_file(out_file)
|
||||
remove_silent(bat_file)
|
||||
remove_silent(out_file)
|
||||
remove_silent(done_file)
|
||||
|
||||
if elapsed >= timeout_sec * 1000 then
|
||||
if not completed then
|
||||
return output or ("Error: command timed out after " .. timeout_sec .. "s")
|
||||
end
|
||||
return output and output ~= "" and output or "(no output)"
|
||||
@@ -117,20 +152,12 @@ local function run(cmd, timeout_sec)
|
||||
)
|
||||
os.execute("sh -c '" .. sh_cmd:gsub("'", "'\\''") .. "' &")
|
||||
|
||||
local elapsed = 0
|
||||
local interval = 50 -- ms
|
||||
while elapsed < timeout_sec * 1000 do
|
||||
if file_exists(done_file) then break end
|
||||
sleep_ms(interval)
|
||||
elapsed = elapsed + interval
|
||||
if interval < 2000 then interval = math.floor(interval * 1.5) end
|
||||
end
|
||||
|
||||
local completed = poll_loop()
|
||||
local output = read_file(out_file)
|
||||
remove_silent(out_file)
|
||||
remove_silent(done_file)
|
||||
|
||||
if elapsed >= timeout_sec * 1000 then
|
||||
if not completed then
|
||||
return output or ("Error: command timed out after " .. timeout_sec .. "s")
|
||||
end
|
||||
return output and output ~= "" and output or "(no output)"
|
||||
|
||||
Reference in New Issue
Block a user