From d72689f709166a8a5425e3d448d609d02a72fb2b Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Wed, 13 May 2026 11:42:23 +0000 Subject: [PATCH] =?UTF-8?q?config:=20deep=20model=20=E2=86=92=20deepseek-c?= =?UTF-8?q?oder-v2-lite=20(temporary)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qwen3-30b-a3b-instruct isn't loaded on hossenfelder right now (per /v1/models). deepseek-coder-v2-lite IS loaded — 16B MoE with ~2.4B active params; fast enough that the 30-min timeout from the qwen3-30b config was wildly over-budget. Switched to deepseek-coder-v2-lite for the time being. Restore qwen3-30b when the slot is back up. Live-probed: YES/NO destructive probe via the deep model preset returns "YES." in ~4.8s — well within the new 5-min timeout, and fast enough that the Phase 3 LLM second-opinion path is now functional again without falling back to "fail-safe YES" on every ambiguous command. Co-Authored-By: Claude Opus 4.7 (1M context) --- config.lua | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/config.lua b/config.lua index 0a5bb05..b46ff45 100644 --- a/config.lua +++ b/config.lua @@ -20,8 +20,11 @@ return { }, deep = { endpoint = HOSSENFELDER, - model = "qwen3-30b-a3b-instruct", - timeout_ms = 1800000, -- 10 min; Nemo on RK3588 is patient work + -- 2026-05-13: qwen3-30b not loaded on hossenfelder right now; + -- using deepseek-coder-v2-lite (16B MoE, ~2.4B active) for the + -- time being. Restore qwen3-30b when the slot is back up. + model = "deepseek-coder-v2-lite", + timeout_ms = 300000, -- 5 min; MoE inference is faster than dense 30B temperature = 0.1, }, cloud = {