secrets: vault loader + scrub/rehydrate + autodetect (#13 commit 1)
Standalone module — no wiring yet. Lands the substrate for issue #13: secrets.load(path) — vault file loader; refuses non-0600 secrets.make_session(vault) — per-conversation scrub/rehydrate state session:scrub(text, mode) — substitute literals (+ autodetect) session:rehydrate(text) — restore placeholders secrets.streaming_rehydrator — chunk-boundary-tolerant streaming wrapper Mode semantics (chosen per call by the caller): "off" — identity, no mapping "vault" — vault literals only, placeholders, rehydratable "vault+autodetect" — + heuristic regexes, placeholders, rehydratable "stealth" — + heuristic regexes, opaque decoys, one-way Placeholders are stable across the session: the same literal always maps to the same $AISH_SECRET_NNN slot, so re-scrubbing the same context is idempotent and the model sees a consistent vocabulary. AUTODETECT_PATTERNS (ordered; longer prefixes first): sk-or-v<N>-... OpenRouter ghp_/gho_/ghs_ GitHub PATs AKIA<16> AWS access keys eyJ...x.y.z JWTs sk-... OpenAI (generic; matched after openrouter) -----BEGIN ... PRIVATE KEY----- SSH/GPG key headers Streaming rehydrator: tolerates a placeholder split across SSE chunks ($AISH_SE then CRET_001). It holds back the trailing partial-match in a buffer, emits the rest, and resolves on the next push or flush. Verified with 20 unit cases (vault sub, stable mapping, autodetect across all label kinds, stealth decoys, mode=off, streaming with mid-placeholder splits, non-placeholder $-prose pass-through). Vault file mode enforcement: 0600 only — matches ssh's behavior for ~/.ssh/id_rsa. Loud failure (status + skip) if mode is wider. Next commit (issue #13 follow-up): wire into broker / tool dispatch / display, add per-broker `redact` policy, :secrets meta, config example block. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+250
@@ -0,0 +1,250 @@
|
||||
-- secrets.lua — vault + scrub/rehydrate for issue #13.
|
||||
--
|
||||
-- Pipeline:
|
||||
-- 1. M.load(path) reads the user's vault. Refuses to load if the file
|
||||
-- isn't mode 0600 (matches ssh's behavior for ~/.ssh/id_rsa).
|
||||
-- 2. M.make_session(vault, opts) returns a per-conversation state object.
|
||||
-- session:scrub(text, mode) substitutes secrets with stable placeholders
|
||||
-- ($AISH_SECRET_001, _002, ...) and records the mapping. session:rehydrate
|
||||
-- reverses it. The mapping is stable across the conversation, so the same
|
||||
-- literal value always maps to the same placeholder slot.
|
||||
-- 3. M.streaming_rehydrator(session) wraps the per-delta rehydration so a
|
||||
-- placeholder split across SSE chunks doesn't render half-substituted.
|
||||
--
|
||||
-- Modes (per call to session:scrub):
|
||||
-- "off" → identity (returns text unchanged, no mapping)
|
||||
-- "vault" → vault literals only, placeholders, rehydratable
|
||||
-- "vault+autodetect" → + heuristic regexes, placeholders, rehydratable
|
||||
-- "stealth" → + heuristic regexes, opaque decoys, NOT rehydratable
|
||||
-- (one-way scrub for zero-info brokers — user and
|
||||
-- model both see decoys; real values only in the
|
||||
-- executor stream which is pre-scrub)
|
||||
|
||||
local M = {}
|
||||
|
||||
-- ---------------------------------------------------------------- AUTODETECT_PATTERNS
|
||||
-- Order matters: longer / more-specific prefixes must come first so a generic
|
||||
-- "sk-..." rule doesn't shadow "sk-or-v1-..." which IS the actual key. Each
|
||||
-- entry is { pat = "<lua pattern>", min_len = N (optional), max_len = N (opt),
|
||||
-- label = "<short tag for decoy names>" }.
|
||||
-- Lua patterns don't support {N} repeats; we use explicit repetition for fixed
|
||||
-- widths and a post-match length check for variable ones.
|
||||
local FOURTEEN_WORD = "%w%w%w%w%w%w%w%w%w%w%w%w%w%w" -- 14
|
||||
local SIXTEEN_UPPER = "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
|
||||
.. "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
|
||||
M.AUTODETECT_PATTERNS = {
|
||||
-- OpenRouter (long form). v1+ catches v1, v2, ...
|
||||
{ pat = "sk%-or%-v%d+%-[%w_-]+", min_len = 20, label = "openrouter" },
|
||||
-- GitHub Personal Access Tokens (ghp_*) — ~40 char body
|
||||
{ pat = "ghp_[%w]+", min_len = 36, label = "ghp" },
|
||||
{ pat = "gho_[%w]+", min_len = 36, label = "gho" },
|
||||
{ pat = "ghs_[%w]+", min_len = 36, label = "ghs" },
|
||||
-- AWS access keys: exactly AKIA + 16 chars [A-Z0-9].
|
||||
{ pat = "AKIA" .. SIXTEEN_UPPER, label = "aws-key" },
|
||||
-- JWT: 3 base64url segments separated by dots; require eyJ prefix
|
||||
-- (decodes to `{"`) so we don't match arbitrary dotted slugs.
|
||||
{ pat = "eyJ[%w_-]+%.[%w_-]+%.[%w_-]+", min_len = 30, label = "jwt" },
|
||||
-- OpenAI generic (must come AFTER sk-or-* to avoid double match)
|
||||
{ pat = "sk%-[%w]+", min_len = 20, label = "openai" },
|
||||
-- SSH/GPG private key block (multi-line; match header only, the caller
|
||||
-- can extend matching to include the body if needed). Greedy across
|
||||
-- newlines isn't easy in Lua patterns — we match just the header line
|
||||
-- and let policy decide to redact the whole file.
|
||||
{ pat = "%-%-%-%-%-BEGIN[^\n]-PRIVATE KEY%-%-%-%-%-",
|
||||
label = "private-key-hdr" },
|
||||
}
|
||||
|
||||
-- ---------------------------------------------------------------- load(path)
|
||||
-- Returns (vault, err). vault is { entries = {{name=, value=}, ...} }.
|
||||
-- entries may be {name=, value=} tables or bare strings (per the issue body).
|
||||
-- Bare strings get a synthesized name from the first 8 chars of value.
|
||||
function M.load(path)
|
||||
local f = io.open(path, "r")
|
||||
if not f then
|
||||
return nil, ("secrets: %s: not found"):format(path)
|
||||
end
|
||||
f:close()
|
||||
|
||||
-- Mode check: refuse to load if not 0600. stat -c %a is GNU coreutils.
|
||||
local sh = io.popen(("stat -c %%a %q 2>/dev/null"):format(path))
|
||||
local mode = sh and sh:read("*l")
|
||||
if sh then sh:close() end
|
||||
if not mode then
|
||||
return nil, ("secrets: %s: cannot stat"):format(path)
|
||||
end
|
||||
if mode ~= "600" then
|
||||
return nil, ("secrets: %s: refusing to load (mode %s, want 600 — chmod 600)"):format(path, mode)
|
||||
end
|
||||
|
||||
local ok, payload = pcall(dofile, path)
|
||||
if not ok then
|
||||
return nil, ("secrets: %s: load failed: %s"):format(path, tostring(payload))
|
||||
end
|
||||
if type(payload) ~= "table" then
|
||||
return nil, ("secrets: %s: must return a list, got %s"):format(path, type(payload))
|
||||
end
|
||||
|
||||
local entries = {}
|
||||
for i, e in ipairs(payload) do
|
||||
if type(e) == "string" then
|
||||
entries[#entries + 1] = {
|
||||
name = ("LITERAL_%d"):format(i),
|
||||
value = e,
|
||||
}
|
||||
elseif type(e) == "table" and type(e.value) == "string" then
|
||||
entries[#entries + 1] = {
|
||||
name = e.name or ("ENTRY_%d"):format(i),
|
||||
value = e.value,
|
||||
}
|
||||
end
|
||||
end
|
||||
return { entries = entries }
|
||||
end
|
||||
|
||||
-- ---------------------------------------------------------------- session
|
||||
local Session = {}
|
||||
Session.__index = Session
|
||||
|
||||
function M.make_session(vault, opts)
|
||||
opts = opts or {}
|
||||
return setmetatable({
|
||||
entries = (vault and vault.entries) or {},
|
||||
mapping_by_value = {}, -- [value] -> placeholder|decoy
|
||||
mapping_by_placeholder = {},-- [placeholder] -> value (for rehydrate)
|
||||
counter = 0,
|
||||
autodetect_patterns = opts.autodetect_patterns or M.AUTODETECT_PATTERNS,
|
||||
}, Session)
|
||||
end
|
||||
|
||||
local function _meets_length(s, p)
|
||||
if p.min_len and #s < p.min_len then return false end
|
||||
if p.max_len and #s > p.max_len then return false end
|
||||
return true
|
||||
end
|
||||
|
||||
-- Allocate a placeholder for `value`, stable across calls in this session.
|
||||
-- For "stealth" mode we use opaque decoys; non-stealth uses $AISH_SECRET_NNN.
|
||||
function Session:_placeholder_for(value, stealth, label)
|
||||
local existing = self.mapping_by_value[value]
|
||||
if existing then return existing end
|
||||
self.counter = self.counter + 1
|
||||
local p
|
||||
if stealth then
|
||||
-- Opaque decoy keyed off the label (so distinct kinds look distinct
|
||||
-- to anyone reading along, without revealing the actual value).
|
||||
p = ("xxxxxx-fake-%s-%03d-xxxxxx"):format(label or "secret", self.counter)
|
||||
else
|
||||
p = ("$AISH_SECRET_%03d"):format(self.counter)
|
||||
-- Only non-stealth placeholders go into the rehydration map.
|
||||
self.mapping_by_placeholder[p] = value
|
||||
end
|
||||
self.mapping_by_value[value] = p
|
||||
return p
|
||||
end
|
||||
|
||||
-- Substitute all vault literals + (in autodetect/stealth modes) all
|
||||
-- AUTODETECT_PATTERNS matches. Returns the scrubbed string.
|
||||
function Session:scrub(text, mode)
|
||||
if not text or text == "" then return text or "" end
|
||||
mode = mode or "vault"
|
||||
if mode == "off" then return text end
|
||||
local stealth = (mode == "stealth")
|
||||
local use_autodetect = (mode == "vault+autodetect" or mode == "stealth")
|
||||
|
||||
-- Vault literals first (deterministic by user's list order).
|
||||
-- Use plain-text find so vault values aren't interpreted as Lua patterns.
|
||||
for _, e in ipairs(self.entries) do
|
||||
local v = e.value
|
||||
if v ~= "" then
|
||||
local out, last, ix = {}, 1, 1
|
||||
while true do
|
||||
local s, fend = text:find(v, ix, true)
|
||||
if not s then break end
|
||||
out[#out + 1] = text:sub(last, s - 1)
|
||||
out[#out + 1] = self:_placeholder_for(v, stealth, e.name)
|
||||
last = fend + 1
|
||||
ix = fend + 1
|
||||
end
|
||||
if #out > 0 then
|
||||
out[#out + 1] = text:sub(last)
|
||||
text = table.concat(out)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- Autodetect heuristics (Lua patterns). Order matters per AUTODETECT_PATTERNS.
|
||||
if use_autodetect then
|
||||
for _, p in ipairs(self.autodetect_patterns) do
|
||||
text = text:gsub(p.pat, function(m)
|
||||
if _meets_length(m, p) then
|
||||
return self:_placeholder_for(m, stealth, p.label)
|
||||
end
|
||||
return m
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
||||
return text
|
||||
end
|
||||
|
||||
-- Reverse the placeholder substitution. Tolerant to trailing punctuation
|
||||
-- and surrounding quotes/backticks (gotcha 1 in the issue body).
|
||||
function Session:rehydrate(text)
|
||||
if not text or text == "" then return text or "" end
|
||||
return (text:gsub("%$AISH_SECRET_(%d%d%d)", function(n)
|
||||
return self.mapping_by_placeholder["$AISH_SECRET_" .. n] or ("$AISH_SECRET_" .. n)
|
||||
end))
|
||||
end
|
||||
|
||||
-- Introspection helpers for the :secrets meta.
|
||||
function Session:mapping_size() return self.counter end
|
||||
function Session:has_vault() return #self.entries > 0 end
|
||||
function Session:vault_names()
|
||||
local out = {}
|
||||
for _, e in ipairs(self.entries) do out[#out + 1] = e.name end
|
||||
return out
|
||||
end
|
||||
|
||||
-- ---------------------------------------------------------------- streaming rehydrator
|
||||
-- Streamed assistant deltas may split a placeholder across chunks
|
||||
-- ($AISH_SE then CRET_001). Buffer just enough to recognize an
|
||||
-- incomplete placeholder match at the tail; emit everything before
|
||||
-- the last `$` that could be the start of a partial placeholder.
|
||||
local Stream = {}
|
||||
Stream.__index = Stream
|
||||
|
||||
function M.streaming_rehydrator(session)
|
||||
return setmetatable({ session = session, tail = "" }, Stream)
|
||||
end
|
||||
|
||||
function Stream:push(chunk)
|
||||
local combined = self.tail .. (chunk or "")
|
||||
-- Substitute any complete placeholders.
|
||||
combined = self.session:rehydrate(combined)
|
||||
-- Hold the trailing partial-placeholder, if any, in the tail buffer.
|
||||
-- A partial is "$" optionally followed by a prefix of "AISH_SECRET_NNN".
|
||||
local last_dollar = nil
|
||||
for i = #combined, 1, -1 do
|
||||
if combined:sub(i, i) == "$" then last_dollar = i; break end
|
||||
end
|
||||
if last_dollar then
|
||||
local maybe = combined:sub(last_dollar)
|
||||
-- Pattern: starts with `$`, then any prefix of `AISH_SECRET_NNN`.
|
||||
if maybe:match("^%$A?I?S?H?_?S?E?C?R?E?T?_?%d?%d?%d?$") then
|
||||
self.tail = maybe
|
||||
return combined:sub(1, last_dollar - 1)
|
||||
end
|
||||
end
|
||||
self.tail = ""
|
||||
return combined
|
||||
end
|
||||
|
||||
function Stream:flush()
|
||||
local r = self.tail
|
||||
self.tail = ""
|
||||
-- One last rehydrate pass — the tail might contain a complete
|
||||
-- placeholder we held only because there was no chunk after it.
|
||||
return self.session:rehydrate(r)
|
||||
end
|
||||
|
||||
return M
|
||||
Reference in New Issue
Block a user