secrets: vault loader + scrub/rehydrate + autodetect (#13 commit 1)

Standalone module — no wiring yet. Lands the substrate for issue #13:

  secrets.load(path)            — vault file loader; refuses non-0600
  secrets.make_session(vault)   — per-conversation scrub/rehydrate state
  session:scrub(text, mode)     — substitute literals (+ autodetect)
  session:rehydrate(text)       — restore placeholders
  secrets.streaming_rehydrator  — chunk-boundary-tolerant streaming wrapper

Mode semantics (chosen per call by the caller):
  "off"               — identity, no mapping
  "vault"             — vault literals only, placeholders, rehydratable
  "vault+autodetect"  — + heuristic regexes, placeholders, rehydratable
  "stealth"           — + heuristic regexes, opaque decoys, one-way

Placeholders are stable across the session: the same literal always
maps to the same $AISH_SECRET_NNN slot, so re-scrubbing the same
context is idempotent and the model sees a consistent vocabulary.

AUTODETECT_PATTERNS (ordered; longer prefixes first):
  sk-or-v<N>-...  OpenRouter
  ghp_/gho_/ghs_  GitHub PATs
  AKIA<16>        AWS access keys
  eyJ...x.y.z     JWTs
  sk-...          OpenAI (generic; matched after openrouter)
  -----BEGIN ... PRIVATE KEY-----  SSH/GPG key headers

Streaming rehydrator: tolerates a placeholder split across SSE chunks
($AISH_SE then CRET_001). It holds back the trailing partial-match
in a buffer, emits the rest, and resolves on the next push or flush.
Verified with 20 unit cases (vault sub, stable mapping, autodetect
across all label kinds, stealth decoys, mode=off, streaming with
mid-placeholder splits, non-placeholder $-prose pass-through).

Vault file mode enforcement: 0600 only — matches ssh's behavior for
~/.ssh/id_rsa. Loud failure (status + skip) if mode is wider.

Next commit (issue #13 follow-up): wire into broker / tool dispatch
/ display, add per-broker `redact` policy, :secrets meta, config
example block.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 21:36:39 +00:00
parent cdf4e86679
commit e4b818b0e9
+250
View File
@@ -0,0 +1,250 @@
-- secrets.lua — vault + scrub/rehydrate for issue #13.
--
-- Pipeline:
-- 1. M.load(path) reads the user's vault. Refuses to load if the file
-- isn't mode 0600 (matches ssh's behavior for ~/.ssh/id_rsa).
-- 2. M.make_session(vault, opts) returns a per-conversation state object.
-- session:scrub(text, mode) substitutes secrets with stable placeholders
-- ($AISH_SECRET_001, _002, ...) and records the mapping. session:rehydrate
-- reverses it. The mapping is stable across the conversation, so the same
-- literal value always maps to the same placeholder slot.
-- 3. M.streaming_rehydrator(session) wraps the per-delta rehydration so a
-- placeholder split across SSE chunks doesn't render half-substituted.
--
-- Modes (per call to session:scrub):
-- "off" → identity (returns text unchanged, no mapping)
-- "vault" → vault literals only, placeholders, rehydratable
-- "vault+autodetect" → + heuristic regexes, placeholders, rehydratable
-- "stealth" → + heuristic regexes, opaque decoys, NOT rehydratable
-- (one-way scrub for zero-info brokers — user and
-- model both see decoys; real values only in the
-- executor stream which is pre-scrub)
local M = {}
-- ---------------------------------------------------------------- AUTODETECT_PATTERNS
-- Order matters: longer / more-specific prefixes must come first so a generic
-- "sk-..." rule doesn't shadow "sk-or-v1-..." which IS the actual key. Each
-- entry is { pat = "<lua pattern>", min_len = N (optional), max_len = N (opt),
-- label = "<short tag for decoy names>" }.
-- Lua patterns don't support {N} repeats; we use explicit repetition for fixed
-- widths and a post-match length check for variable ones.
local FOURTEEN_WORD = "%w%w%w%w%w%w%w%w%w%w%w%w%w%w" -- 14
local SIXTEEN_UPPER = "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
.. "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
M.AUTODETECT_PATTERNS = {
-- OpenRouter (long form). v1+ catches v1, v2, ...
{ pat = "sk%-or%-v%d+%-[%w_-]+", min_len = 20, label = "openrouter" },
-- GitHub Personal Access Tokens (ghp_*) — ~40 char body
{ pat = "ghp_[%w]+", min_len = 36, label = "ghp" },
{ pat = "gho_[%w]+", min_len = 36, label = "gho" },
{ pat = "ghs_[%w]+", min_len = 36, label = "ghs" },
-- AWS access keys: exactly AKIA + 16 chars [A-Z0-9].
{ pat = "AKIA" .. SIXTEEN_UPPER, label = "aws-key" },
-- JWT: 3 base64url segments separated by dots; require eyJ prefix
-- (decodes to `{"`) so we don't match arbitrary dotted slugs.
{ pat = "eyJ[%w_-]+%.[%w_-]+%.[%w_-]+", min_len = 30, label = "jwt" },
-- OpenAI generic (must come AFTER sk-or-* to avoid double match)
{ pat = "sk%-[%w]+", min_len = 20, label = "openai" },
-- SSH/GPG private key block (multi-line; match header only, the caller
-- can extend matching to include the body if needed). Greedy across
-- newlines isn't easy in Lua patterns — we match just the header line
-- and let policy decide to redact the whole file.
{ pat = "%-%-%-%-%-BEGIN[^\n]-PRIVATE KEY%-%-%-%-%-",
label = "private-key-hdr" },
}
-- ---------------------------------------------------------------- load(path)
-- Returns (vault, err). vault is { entries = {{name=, value=}, ...} }.
-- entries may be {name=, value=} tables or bare strings (per the issue body).
-- Bare strings get a synthesized name from the first 8 chars of value.
function M.load(path)
local f = io.open(path, "r")
if not f then
return nil, ("secrets: %s: not found"):format(path)
end
f:close()
-- Mode check: refuse to load if not 0600. stat -c %a is GNU coreutils.
local sh = io.popen(("stat -c %%a %q 2>/dev/null"):format(path))
local mode = sh and sh:read("*l")
if sh then sh:close() end
if not mode then
return nil, ("secrets: %s: cannot stat"):format(path)
end
if mode ~= "600" then
return nil, ("secrets: %s: refusing to load (mode %s, want 600 — chmod 600)"):format(path, mode)
end
local ok, payload = pcall(dofile, path)
if not ok then
return nil, ("secrets: %s: load failed: %s"):format(path, tostring(payload))
end
if type(payload) ~= "table" then
return nil, ("secrets: %s: must return a list, got %s"):format(path, type(payload))
end
local entries = {}
for i, e in ipairs(payload) do
if type(e) == "string" then
entries[#entries + 1] = {
name = ("LITERAL_%d"):format(i),
value = e,
}
elseif type(e) == "table" and type(e.value) == "string" then
entries[#entries + 1] = {
name = e.name or ("ENTRY_%d"):format(i),
value = e.value,
}
end
end
return { entries = entries }
end
-- ---------------------------------------------------------------- session
local Session = {}
Session.__index = Session
function M.make_session(vault, opts)
opts = opts or {}
return setmetatable({
entries = (vault and vault.entries) or {},
mapping_by_value = {}, -- [value] -> placeholder|decoy
mapping_by_placeholder = {},-- [placeholder] -> value (for rehydrate)
counter = 0,
autodetect_patterns = opts.autodetect_patterns or M.AUTODETECT_PATTERNS,
}, Session)
end
local function _meets_length(s, p)
if p.min_len and #s < p.min_len then return false end
if p.max_len and #s > p.max_len then return false end
return true
end
-- Allocate a placeholder for `value`, stable across calls in this session.
-- For "stealth" mode we use opaque decoys; non-stealth uses $AISH_SECRET_NNN.
function Session:_placeholder_for(value, stealth, label)
local existing = self.mapping_by_value[value]
if existing then return existing end
self.counter = self.counter + 1
local p
if stealth then
-- Opaque decoy keyed off the label (so distinct kinds look distinct
-- to anyone reading along, without revealing the actual value).
p = ("xxxxxx-fake-%s-%03d-xxxxxx"):format(label or "secret", self.counter)
else
p = ("$AISH_SECRET_%03d"):format(self.counter)
-- Only non-stealth placeholders go into the rehydration map.
self.mapping_by_placeholder[p] = value
end
self.mapping_by_value[value] = p
return p
end
-- Substitute all vault literals + (in autodetect/stealth modes) all
-- AUTODETECT_PATTERNS matches. Returns the scrubbed string.
function Session:scrub(text, mode)
if not text or text == "" then return text or "" end
mode = mode or "vault"
if mode == "off" then return text end
local stealth = (mode == "stealth")
local use_autodetect = (mode == "vault+autodetect" or mode == "stealth")
-- Vault literals first (deterministic by user's list order).
-- Use plain-text find so vault values aren't interpreted as Lua patterns.
for _, e in ipairs(self.entries) do
local v = e.value
if v ~= "" then
local out, last, ix = {}, 1, 1
while true do
local s, fend = text:find(v, ix, true)
if not s then break end
out[#out + 1] = text:sub(last, s - 1)
out[#out + 1] = self:_placeholder_for(v, stealth, e.name)
last = fend + 1
ix = fend + 1
end
if #out > 0 then
out[#out + 1] = text:sub(last)
text = table.concat(out)
end
end
end
-- Autodetect heuristics (Lua patterns). Order matters per AUTODETECT_PATTERNS.
if use_autodetect then
for _, p in ipairs(self.autodetect_patterns) do
text = text:gsub(p.pat, function(m)
if _meets_length(m, p) then
return self:_placeholder_for(m, stealth, p.label)
end
return m
end)
end
end
return text
end
-- Reverse the placeholder substitution. Tolerant to trailing punctuation
-- and surrounding quotes/backticks (gotcha 1 in the issue body).
function Session:rehydrate(text)
if not text or text == "" then return text or "" end
return (text:gsub("%$AISH_SECRET_(%d%d%d)", function(n)
return self.mapping_by_placeholder["$AISH_SECRET_" .. n] or ("$AISH_SECRET_" .. n)
end))
end
-- Introspection helpers for the :secrets meta.
function Session:mapping_size() return self.counter end
function Session:has_vault() return #self.entries > 0 end
function Session:vault_names()
local out = {}
for _, e in ipairs(self.entries) do out[#out + 1] = e.name end
return out
end
-- ---------------------------------------------------------------- streaming rehydrator
-- Streamed assistant deltas may split a placeholder across chunks
-- ($AISH_SE then CRET_001). Buffer just enough to recognize an
-- incomplete placeholder match at the tail; emit everything before
-- the last `$` that could be the start of a partial placeholder.
local Stream = {}
Stream.__index = Stream
function M.streaming_rehydrator(session)
return setmetatable({ session = session, tail = "" }, Stream)
end
function Stream:push(chunk)
local combined = self.tail .. (chunk or "")
-- Substitute any complete placeholders.
combined = self.session:rehydrate(combined)
-- Hold the trailing partial-placeholder, if any, in the tail buffer.
-- A partial is "$" optionally followed by a prefix of "AISH_SECRET_NNN".
local last_dollar = nil
for i = #combined, 1, -1 do
if combined:sub(i, i) == "$" then last_dollar = i; break end
end
if last_dollar then
local maybe = combined:sub(last_dollar)
-- Pattern: starts with `$`, then any prefix of `AISH_SECRET_NNN`.
if maybe:match("^%$A?I?S?H?_?S?E?C?R?E?T?_?%d?%d?%d?$") then
self.tail = maybe
return combined:sub(1, last_dollar - 1)
end
end
self.tail = ""
return combined
end
function Stream:flush()
local r = self.tail
self.tail = ""
-- One last rehydrate pass — the tail might contain a complete
-- placeholder we held only because there was no chunk after it.
return self.session:rehydrate(r)
end
return M