e4b818b0e9
Standalone module — no wiring yet. Lands the substrate for issue #13: secrets.load(path) — vault file loader; refuses non-0600 secrets.make_session(vault) — per-conversation scrub/rehydrate state session:scrub(text, mode) — substitute literals (+ autodetect) session:rehydrate(text) — restore placeholders secrets.streaming_rehydrator — chunk-boundary-tolerant streaming wrapper Mode semantics (chosen per call by the caller): "off" — identity, no mapping "vault" — vault literals only, placeholders, rehydratable "vault+autodetect" — + heuristic regexes, placeholders, rehydratable "stealth" — + heuristic regexes, opaque decoys, one-way Placeholders are stable across the session: the same literal always maps to the same $AISH_SECRET_NNN slot, so re-scrubbing the same context is idempotent and the model sees a consistent vocabulary. AUTODETECT_PATTERNS (ordered; longer prefixes first): sk-or-v<N>-... OpenRouter ghp_/gho_/ghs_ GitHub PATs AKIA<16> AWS access keys eyJ...x.y.z JWTs sk-... OpenAI (generic; matched after openrouter) -----BEGIN ... PRIVATE KEY----- SSH/GPG key headers Streaming rehydrator: tolerates a placeholder split across SSE chunks ($AISH_SE then CRET_001). It holds back the trailing partial-match in a buffer, emits the rest, and resolves on the next push or flush. Verified with 20 unit cases (vault sub, stable mapping, autodetect across all label kinds, stealth decoys, mode=off, streaming with mid-placeholder splits, non-placeholder $-prose pass-through). Vault file mode enforcement: 0600 only — matches ssh's behavior for ~/.ssh/id_rsa. Loud failure (status + skip) if mode is wider. Next commit (issue #13 follow-up): wire into broker / tool dispatch / display, add per-broker `redact` policy, :secrets meta, config example block. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
251 lines
10 KiB
Lua
251 lines
10 KiB
Lua
-- secrets.lua — vault + scrub/rehydrate for issue #13.
|
|
--
|
|
-- Pipeline:
|
|
-- 1. M.load(path) reads the user's vault. Refuses to load if the file
|
|
-- isn't mode 0600 (matches ssh's behavior for ~/.ssh/id_rsa).
|
|
-- 2. M.make_session(vault, opts) returns a per-conversation state object.
|
|
-- session:scrub(text, mode) substitutes secrets with stable placeholders
|
|
-- ($AISH_SECRET_001, _002, ...) and records the mapping. session:rehydrate
|
|
-- reverses it. The mapping is stable across the conversation, so the same
|
|
-- literal value always maps to the same placeholder slot.
|
|
-- 3. M.streaming_rehydrator(session) wraps the per-delta rehydration so a
|
|
-- placeholder split across SSE chunks doesn't render half-substituted.
|
|
--
|
|
-- Modes (per call to session:scrub):
|
|
-- "off" → identity (returns text unchanged, no mapping)
|
|
-- "vault" → vault literals only, placeholders, rehydratable
|
|
-- "vault+autodetect" → + heuristic regexes, placeholders, rehydratable
|
|
-- "stealth" → + heuristic regexes, opaque decoys, NOT rehydratable
|
|
-- (one-way scrub for zero-info brokers — user and
|
|
-- model both see decoys; real values only in the
|
|
-- executor stream which is pre-scrub)
|
|
|
|
local M = {}
|
|
|
|
-- ---------------------------------------------------------------- AUTODETECT_PATTERNS
|
|
-- Order matters: longer / more-specific prefixes must come first so a generic
|
|
-- "sk-..." rule doesn't shadow "sk-or-v1-..." which IS the actual key. Each
|
|
-- entry is { pat = "<lua pattern>", min_len = N (optional), max_len = N (opt),
|
|
-- label = "<short tag for decoy names>" }.
|
|
-- Lua patterns don't support {N} repeats; we use explicit repetition for fixed
|
|
-- widths and a post-match length check for variable ones.
|
|
local FOURTEEN_WORD = "%w%w%w%w%w%w%w%w%w%w%w%w%w%w" -- 14
|
|
local SIXTEEN_UPPER = "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
|
|
.. "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
|
|
M.AUTODETECT_PATTERNS = {
|
|
-- OpenRouter (long form). v1+ catches v1, v2, ...
|
|
{ pat = "sk%-or%-v%d+%-[%w_-]+", min_len = 20, label = "openrouter" },
|
|
-- GitHub Personal Access Tokens (ghp_*) — ~40 char body
|
|
{ pat = "ghp_[%w]+", min_len = 36, label = "ghp" },
|
|
{ pat = "gho_[%w]+", min_len = 36, label = "gho" },
|
|
{ pat = "ghs_[%w]+", min_len = 36, label = "ghs" },
|
|
-- AWS access keys: exactly AKIA + 16 chars [A-Z0-9].
|
|
{ pat = "AKIA" .. SIXTEEN_UPPER, label = "aws-key" },
|
|
-- JWT: 3 base64url segments separated by dots; require eyJ prefix
|
|
-- (decodes to `{"`) so we don't match arbitrary dotted slugs.
|
|
{ pat = "eyJ[%w_-]+%.[%w_-]+%.[%w_-]+", min_len = 30, label = "jwt" },
|
|
-- OpenAI generic (must come AFTER sk-or-* to avoid double match)
|
|
{ pat = "sk%-[%w]+", min_len = 20, label = "openai" },
|
|
-- SSH/GPG private key block (multi-line; match header only, the caller
|
|
-- can extend matching to include the body if needed). Greedy across
|
|
-- newlines isn't easy in Lua patterns — we match just the header line
|
|
-- and let policy decide to redact the whole file.
|
|
{ pat = "%-%-%-%-%-BEGIN[^\n]-PRIVATE KEY%-%-%-%-%-",
|
|
label = "private-key-hdr" },
|
|
}
|
|
|
|
-- ---------------------------------------------------------------- load(path)
|
|
-- Returns (vault, err). vault is { entries = {{name=, value=}, ...} }.
|
|
-- entries may be {name=, value=} tables or bare strings (per the issue body).
|
|
-- Bare strings get a synthesized name from the first 8 chars of value.
|
|
function M.load(path)
|
|
local f = io.open(path, "r")
|
|
if not f then
|
|
return nil, ("secrets: %s: not found"):format(path)
|
|
end
|
|
f:close()
|
|
|
|
-- Mode check: refuse to load if not 0600. stat -c %a is GNU coreutils.
|
|
local sh = io.popen(("stat -c %%a %q 2>/dev/null"):format(path))
|
|
local mode = sh and sh:read("*l")
|
|
if sh then sh:close() end
|
|
if not mode then
|
|
return nil, ("secrets: %s: cannot stat"):format(path)
|
|
end
|
|
if mode ~= "600" then
|
|
return nil, ("secrets: %s: refusing to load (mode %s, want 600 — chmod 600)"):format(path, mode)
|
|
end
|
|
|
|
local ok, payload = pcall(dofile, path)
|
|
if not ok then
|
|
return nil, ("secrets: %s: load failed: %s"):format(path, tostring(payload))
|
|
end
|
|
if type(payload) ~= "table" then
|
|
return nil, ("secrets: %s: must return a list, got %s"):format(path, type(payload))
|
|
end
|
|
|
|
local entries = {}
|
|
for i, e in ipairs(payload) do
|
|
if type(e) == "string" then
|
|
entries[#entries + 1] = {
|
|
name = ("LITERAL_%d"):format(i),
|
|
value = e,
|
|
}
|
|
elseif type(e) == "table" and type(e.value) == "string" then
|
|
entries[#entries + 1] = {
|
|
name = e.name or ("ENTRY_%d"):format(i),
|
|
value = e.value,
|
|
}
|
|
end
|
|
end
|
|
return { entries = entries }
|
|
end
|
|
|
|
-- ---------------------------------------------------------------- session
|
|
local Session = {}
|
|
Session.__index = Session
|
|
|
|
function M.make_session(vault, opts)
|
|
opts = opts or {}
|
|
return setmetatable({
|
|
entries = (vault and vault.entries) or {},
|
|
mapping_by_value = {}, -- [value] -> placeholder|decoy
|
|
mapping_by_placeholder = {},-- [placeholder] -> value (for rehydrate)
|
|
counter = 0,
|
|
autodetect_patterns = opts.autodetect_patterns or M.AUTODETECT_PATTERNS,
|
|
}, Session)
|
|
end
|
|
|
|
local function _meets_length(s, p)
|
|
if p.min_len and #s < p.min_len then return false end
|
|
if p.max_len and #s > p.max_len then return false end
|
|
return true
|
|
end
|
|
|
|
-- Allocate a placeholder for `value`, stable across calls in this session.
|
|
-- For "stealth" mode we use opaque decoys; non-stealth uses $AISH_SECRET_NNN.
|
|
function Session:_placeholder_for(value, stealth, label)
|
|
local existing = self.mapping_by_value[value]
|
|
if existing then return existing end
|
|
self.counter = self.counter + 1
|
|
local p
|
|
if stealth then
|
|
-- Opaque decoy keyed off the label (so distinct kinds look distinct
|
|
-- to anyone reading along, without revealing the actual value).
|
|
p = ("xxxxxx-fake-%s-%03d-xxxxxx"):format(label or "secret", self.counter)
|
|
else
|
|
p = ("$AISH_SECRET_%03d"):format(self.counter)
|
|
-- Only non-stealth placeholders go into the rehydration map.
|
|
self.mapping_by_placeholder[p] = value
|
|
end
|
|
self.mapping_by_value[value] = p
|
|
return p
|
|
end
|
|
|
|
-- Substitute all vault literals + (in autodetect/stealth modes) all
|
|
-- AUTODETECT_PATTERNS matches. Returns the scrubbed string.
|
|
function Session:scrub(text, mode)
|
|
if not text or text == "" then return text or "" end
|
|
mode = mode or "vault"
|
|
if mode == "off" then return text end
|
|
local stealth = (mode == "stealth")
|
|
local use_autodetect = (mode == "vault+autodetect" or mode == "stealth")
|
|
|
|
-- Vault literals first (deterministic by user's list order).
|
|
-- Use plain-text find so vault values aren't interpreted as Lua patterns.
|
|
for _, e in ipairs(self.entries) do
|
|
local v = e.value
|
|
if v ~= "" then
|
|
local out, last, ix = {}, 1, 1
|
|
while true do
|
|
local s, fend = text:find(v, ix, true)
|
|
if not s then break end
|
|
out[#out + 1] = text:sub(last, s - 1)
|
|
out[#out + 1] = self:_placeholder_for(v, stealth, e.name)
|
|
last = fend + 1
|
|
ix = fend + 1
|
|
end
|
|
if #out > 0 then
|
|
out[#out + 1] = text:sub(last)
|
|
text = table.concat(out)
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Autodetect heuristics (Lua patterns). Order matters per AUTODETECT_PATTERNS.
|
|
if use_autodetect then
|
|
for _, p in ipairs(self.autodetect_patterns) do
|
|
text = text:gsub(p.pat, function(m)
|
|
if _meets_length(m, p) then
|
|
return self:_placeholder_for(m, stealth, p.label)
|
|
end
|
|
return m
|
|
end)
|
|
end
|
|
end
|
|
|
|
return text
|
|
end
|
|
|
|
-- Reverse the placeholder substitution. Tolerant to trailing punctuation
|
|
-- and surrounding quotes/backticks (gotcha 1 in the issue body).
|
|
function Session:rehydrate(text)
|
|
if not text or text == "" then return text or "" end
|
|
return (text:gsub("%$AISH_SECRET_(%d%d%d)", function(n)
|
|
return self.mapping_by_placeholder["$AISH_SECRET_" .. n] or ("$AISH_SECRET_" .. n)
|
|
end))
|
|
end
|
|
|
|
-- Introspection helpers for the :secrets meta.
|
|
function Session:mapping_size() return self.counter end
|
|
function Session:has_vault() return #self.entries > 0 end
|
|
function Session:vault_names()
|
|
local out = {}
|
|
for _, e in ipairs(self.entries) do out[#out + 1] = e.name end
|
|
return out
|
|
end
|
|
|
|
-- ---------------------------------------------------------------- streaming rehydrator
|
|
-- Streamed assistant deltas may split a placeholder across chunks
|
|
-- ($AISH_SE then CRET_001). Buffer just enough to recognize an
|
|
-- incomplete placeholder match at the tail; emit everything before
|
|
-- the last `$` that could be the start of a partial placeholder.
|
|
local Stream = {}
|
|
Stream.__index = Stream
|
|
|
|
function M.streaming_rehydrator(session)
|
|
return setmetatable({ session = session, tail = "" }, Stream)
|
|
end
|
|
|
|
function Stream:push(chunk)
|
|
local combined = self.tail .. (chunk or "")
|
|
-- Substitute any complete placeholders.
|
|
combined = self.session:rehydrate(combined)
|
|
-- Hold the trailing partial-placeholder, if any, in the tail buffer.
|
|
-- A partial is "$" optionally followed by a prefix of "AISH_SECRET_NNN".
|
|
local last_dollar = nil
|
|
for i = #combined, 1, -1 do
|
|
if combined:sub(i, i) == "$" then last_dollar = i; break end
|
|
end
|
|
if last_dollar then
|
|
local maybe = combined:sub(last_dollar)
|
|
-- Pattern: starts with `$`, then any prefix of `AISH_SECRET_NNN`.
|
|
if maybe:match("^%$A?I?S?H?_?S?E?C?R?E?T?_?%d?%d?%d?$") then
|
|
self.tail = maybe
|
|
return combined:sub(1, last_dollar - 1)
|
|
end
|
|
end
|
|
self.tail = ""
|
|
return combined
|
|
end
|
|
|
|
function Stream:flush()
|
|
local r = self.tail
|
|
self.tail = ""
|
|
-- One last rehydrate pass — the tail might contain a complete
|
|
-- placeholder we held only because there was no chunk after it.
|
|
return self.session:rehydrate(r)
|
|
end
|
|
|
|
return M
|