From e4b818b0e976ad579aca7346adf87c60d4cd8ceb Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Sat, 16 May 2026 21:36:39 +0000 Subject: [PATCH] secrets: vault loader + scrub/rehydrate + autodetect (#13 commit 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standalone module — no wiring yet. Lands the substrate for issue #13: secrets.load(path) — vault file loader; refuses non-0600 secrets.make_session(vault) — per-conversation scrub/rehydrate state session:scrub(text, mode) — substitute literals (+ autodetect) session:rehydrate(text) — restore placeholders secrets.streaming_rehydrator — chunk-boundary-tolerant streaming wrapper Mode semantics (chosen per call by the caller): "off" — identity, no mapping "vault" — vault literals only, placeholders, rehydratable "vault+autodetect" — + heuristic regexes, placeholders, rehydratable "stealth" — + heuristic regexes, opaque decoys, one-way Placeholders are stable across the session: the same literal always maps to the same $AISH_SECRET_NNN slot, so re-scrubbing the same context is idempotent and the model sees a consistent vocabulary. AUTODETECT_PATTERNS (ordered; longer prefixes first): sk-or-v-... OpenRouter ghp_/gho_/ghs_ GitHub PATs AKIA<16> AWS access keys eyJ...x.y.z JWTs sk-... OpenAI (generic; matched after openrouter) -----BEGIN ... PRIVATE KEY----- SSH/GPG key headers Streaming rehydrator: tolerates a placeholder split across SSE chunks ($AISH_SE then CRET_001). It holds back the trailing partial-match in a buffer, emits the rest, and resolves on the next push or flush. Verified with 20 unit cases (vault sub, stable mapping, autodetect across all label kinds, stealth decoys, mode=off, streaming with mid-placeholder splits, non-placeholder $-prose pass-through). Vault file mode enforcement: 0600 only — matches ssh's behavior for ~/.ssh/id_rsa. Loud failure (status + skip) if mode is wider. Next commit (issue #13 follow-up): wire into broker / tool dispatch / display, add per-broker `redact` policy, :secrets meta, config example block. Co-Authored-By: Claude Opus 4.7 (1M context) --- secrets.lua | 250 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 secrets.lua diff --git a/secrets.lua b/secrets.lua new file mode 100644 index 0000000..8852955 --- /dev/null +++ b/secrets.lua @@ -0,0 +1,250 @@ +-- secrets.lua — vault + scrub/rehydrate for issue #13. +-- +-- Pipeline: +-- 1. M.load(path) reads the user's vault. Refuses to load if the file +-- isn't mode 0600 (matches ssh's behavior for ~/.ssh/id_rsa). +-- 2. M.make_session(vault, opts) returns a per-conversation state object. +-- session:scrub(text, mode) substitutes secrets with stable placeholders +-- ($AISH_SECRET_001, _002, ...) and records the mapping. session:rehydrate +-- reverses it. The mapping is stable across the conversation, so the same +-- literal value always maps to the same placeholder slot. +-- 3. M.streaming_rehydrator(session) wraps the per-delta rehydration so a +-- placeholder split across SSE chunks doesn't render half-substituted. +-- +-- Modes (per call to session:scrub): +-- "off" → identity (returns text unchanged, no mapping) +-- "vault" → vault literals only, placeholders, rehydratable +-- "vault+autodetect" → + heuristic regexes, placeholders, rehydratable +-- "stealth" → + heuristic regexes, opaque decoys, NOT rehydratable +-- (one-way scrub for zero-info brokers — user and +-- model both see decoys; real values only in the +-- executor stream which is pre-scrub) + +local M = {} + +-- ---------------------------------------------------------------- AUTODETECT_PATTERNS +-- Order matters: longer / more-specific prefixes must come first so a generic +-- "sk-..." rule doesn't shadow "sk-or-v1-..." which IS the actual key. Each +-- entry is { pat = "", min_len = N (optional), max_len = N (opt), +-- label = "" }. +-- Lua patterns don't support {N} repeats; we use explicit repetition for fixed +-- widths and a post-match length check for variable ones. +local FOURTEEN_WORD = "%w%w%w%w%w%w%w%w%w%w%w%w%w%w" -- 14 +local SIXTEEN_UPPER = "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]" + .. "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]" +M.AUTODETECT_PATTERNS = { + -- OpenRouter (long form). v1+ catches v1, v2, ... + { pat = "sk%-or%-v%d+%-[%w_-]+", min_len = 20, label = "openrouter" }, + -- GitHub Personal Access Tokens (ghp_*) — ~40 char body + { pat = "ghp_[%w]+", min_len = 36, label = "ghp" }, + { pat = "gho_[%w]+", min_len = 36, label = "gho" }, + { pat = "ghs_[%w]+", min_len = 36, label = "ghs" }, + -- AWS access keys: exactly AKIA + 16 chars [A-Z0-9]. + { pat = "AKIA" .. SIXTEEN_UPPER, label = "aws-key" }, + -- JWT: 3 base64url segments separated by dots; require eyJ prefix + -- (decodes to `{"`) so we don't match arbitrary dotted slugs. + { pat = "eyJ[%w_-]+%.[%w_-]+%.[%w_-]+", min_len = 30, label = "jwt" }, + -- OpenAI generic (must come AFTER sk-or-* to avoid double match) + { pat = "sk%-[%w]+", min_len = 20, label = "openai" }, + -- SSH/GPG private key block (multi-line; match header only, the caller + -- can extend matching to include the body if needed). Greedy across + -- newlines isn't easy in Lua patterns — we match just the header line + -- and let policy decide to redact the whole file. + { pat = "%-%-%-%-%-BEGIN[^\n]-PRIVATE KEY%-%-%-%-%-", + label = "private-key-hdr" }, +} + +-- ---------------------------------------------------------------- load(path) +-- Returns (vault, err). vault is { entries = {{name=, value=}, ...} }. +-- entries may be {name=, value=} tables or bare strings (per the issue body). +-- Bare strings get a synthesized name from the first 8 chars of value. +function M.load(path) + local f = io.open(path, "r") + if not f then + return nil, ("secrets: %s: not found"):format(path) + end + f:close() + + -- Mode check: refuse to load if not 0600. stat -c %a is GNU coreutils. + local sh = io.popen(("stat -c %%a %q 2>/dev/null"):format(path)) + local mode = sh and sh:read("*l") + if sh then sh:close() end + if not mode then + return nil, ("secrets: %s: cannot stat"):format(path) + end + if mode ~= "600" then + return nil, ("secrets: %s: refusing to load (mode %s, want 600 — chmod 600)"):format(path, mode) + end + + local ok, payload = pcall(dofile, path) + if not ok then + return nil, ("secrets: %s: load failed: %s"):format(path, tostring(payload)) + end + if type(payload) ~= "table" then + return nil, ("secrets: %s: must return a list, got %s"):format(path, type(payload)) + end + + local entries = {} + for i, e in ipairs(payload) do + if type(e) == "string" then + entries[#entries + 1] = { + name = ("LITERAL_%d"):format(i), + value = e, + } + elseif type(e) == "table" and type(e.value) == "string" then + entries[#entries + 1] = { + name = e.name or ("ENTRY_%d"):format(i), + value = e.value, + } + end + end + return { entries = entries } +end + +-- ---------------------------------------------------------------- session +local Session = {} +Session.__index = Session + +function M.make_session(vault, opts) + opts = opts or {} + return setmetatable({ + entries = (vault and vault.entries) or {}, + mapping_by_value = {}, -- [value] -> placeholder|decoy + mapping_by_placeholder = {},-- [placeholder] -> value (for rehydrate) + counter = 0, + autodetect_patterns = opts.autodetect_patterns or M.AUTODETECT_PATTERNS, + }, Session) +end + +local function _meets_length(s, p) + if p.min_len and #s < p.min_len then return false end + if p.max_len and #s > p.max_len then return false end + return true +end + +-- Allocate a placeholder for `value`, stable across calls in this session. +-- For "stealth" mode we use opaque decoys; non-stealth uses $AISH_SECRET_NNN. +function Session:_placeholder_for(value, stealth, label) + local existing = self.mapping_by_value[value] + if existing then return existing end + self.counter = self.counter + 1 + local p + if stealth then + -- Opaque decoy keyed off the label (so distinct kinds look distinct + -- to anyone reading along, without revealing the actual value). + p = ("xxxxxx-fake-%s-%03d-xxxxxx"):format(label or "secret", self.counter) + else + p = ("$AISH_SECRET_%03d"):format(self.counter) + -- Only non-stealth placeholders go into the rehydration map. + self.mapping_by_placeholder[p] = value + end + self.mapping_by_value[value] = p + return p +end + +-- Substitute all vault literals + (in autodetect/stealth modes) all +-- AUTODETECT_PATTERNS matches. Returns the scrubbed string. +function Session:scrub(text, mode) + if not text or text == "" then return text or "" end + mode = mode or "vault" + if mode == "off" then return text end + local stealth = (mode == "stealth") + local use_autodetect = (mode == "vault+autodetect" or mode == "stealth") + + -- Vault literals first (deterministic by user's list order). + -- Use plain-text find so vault values aren't interpreted as Lua patterns. + for _, e in ipairs(self.entries) do + local v = e.value + if v ~= "" then + local out, last, ix = {}, 1, 1 + while true do + local s, fend = text:find(v, ix, true) + if not s then break end + out[#out + 1] = text:sub(last, s - 1) + out[#out + 1] = self:_placeholder_for(v, stealth, e.name) + last = fend + 1 + ix = fend + 1 + end + if #out > 0 then + out[#out + 1] = text:sub(last) + text = table.concat(out) + end + end + end + + -- Autodetect heuristics (Lua patterns). Order matters per AUTODETECT_PATTERNS. + if use_autodetect then + for _, p in ipairs(self.autodetect_patterns) do + text = text:gsub(p.pat, function(m) + if _meets_length(m, p) then + return self:_placeholder_for(m, stealth, p.label) + end + return m + end) + end + end + + return text +end + +-- Reverse the placeholder substitution. Tolerant to trailing punctuation +-- and surrounding quotes/backticks (gotcha 1 in the issue body). +function Session:rehydrate(text) + if not text or text == "" then return text or "" end + return (text:gsub("%$AISH_SECRET_(%d%d%d)", function(n) + return self.mapping_by_placeholder["$AISH_SECRET_" .. n] or ("$AISH_SECRET_" .. n) + end)) +end + +-- Introspection helpers for the :secrets meta. +function Session:mapping_size() return self.counter end +function Session:has_vault() return #self.entries > 0 end +function Session:vault_names() + local out = {} + for _, e in ipairs(self.entries) do out[#out + 1] = e.name end + return out +end + +-- ---------------------------------------------------------------- streaming rehydrator +-- Streamed assistant deltas may split a placeholder across chunks +-- ($AISH_SE then CRET_001). Buffer just enough to recognize an +-- incomplete placeholder match at the tail; emit everything before +-- the last `$` that could be the start of a partial placeholder. +local Stream = {} +Stream.__index = Stream + +function M.streaming_rehydrator(session) + return setmetatable({ session = session, tail = "" }, Stream) +end + +function Stream:push(chunk) + local combined = self.tail .. (chunk or "") + -- Substitute any complete placeholders. + combined = self.session:rehydrate(combined) + -- Hold the trailing partial-placeholder, if any, in the tail buffer. + -- A partial is "$" optionally followed by a prefix of "AISH_SECRET_NNN". + local last_dollar = nil + for i = #combined, 1, -1 do + if combined:sub(i, i) == "$" then last_dollar = i; break end + end + if last_dollar then + local maybe = combined:sub(last_dollar) + -- Pattern: starts with `$`, then any prefix of `AISH_SECRET_NNN`. + if maybe:match("^%$A?I?S?H?_?S?E?C?R?E?T?_?%d?%d?%d?$") then + self.tail = maybe + return combined:sub(1, last_dollar - 1) + end + end + self.tail = "" + return combined +end + +function Stream:flush() + local r = self.tail + self.tail = "" + -- One last rehydrate pass — the tail might contain a complete + -- placeholder we held only because there was no chunk after it. + return self.session:rehydrate(r) +end + +return M