Files
aish/history.lua
marfrit e525063df3 history: trust file helpers for Phase 9 (commit #1)
Foundation for the project-overlay trust mechanism. No callers yet —
commit #2 wires main.lua to use these.

Three new functions:

  history._sha256_file(path) -> hex digest or nil
    Shells `sha256sum`; parses first whitespace-separated field;
    validates 64-hex-char length. nil on any failure (path missing,
    binary missing, file unreadable). Caller treats nil as "skip
    the trust path" — never crashes.

  history.is_trusted(trust_path, project_path, sha256) -> bool
    Reads trust_path as JSONL; returns true iff an entry exists
    matching BOTH project_path AND sha256. Missing / corrupt /
    unreadable trust file -> false (re-prompt). Per-line JSON
    decode means partial-write corruption affects at most one line.

  history.add_trusted(trust_path, project_path, sha256) -> bool
    mkdir -p parent; append JSONL line {path, sha256, ts (ISO)};
    chmod 600 the trust file (best-effort; ignore failure). Single
    writer per call; append-only.

11 unit cases verified:
  - sha256 known value matches manual `sha256sum`
  - nil / missing-file -> nil (no crash)
  - is_trusted on missing trust file -> false
  - add_trusted + is_trusted roundtrip works
  - Different sha -> not trusted (content-binding)
  - Different path -> not trusted
  - Multi-entry trust file: each entry independently checked
  - chmod 600 verified via stat

Regression: test_safety 87/87, test_router_model 31/31.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 23:45:07 +00:00

371 lines
13 KiB
Lua

-- history.lua — persistent session log + cross-session memory store.
-- Phase 1: append-only JSONL per session under <config.history.dir>/sessions/.
-- Phase 4: cross-session memory.jsonl at <config.history.dir>/memory.jsonl,
-- single-writer enforced via flock(LOCK_EX | LOCK_NB) per PHASE4 R-B1.
-- See docs/PHASE0.md §11, docs/PHASE1.md §6, docs/PHASE4.md §4.
local json = require("dkjson")
local libc = require("ffi.libc")
local ffi = require("ffi")
local M = {}
local Session = {}
Session.__index = Session
local Memory = {}
Memory.__index = Memory
-- Best-effort mkdir -p. Failures are surfaced by io.open below. Uses
-- single-quote escaping (Lua's %q double-quotes, which still expands $(...)
-- and $VAR inside) so a path containing shell metacharacters doesn't trip.
local function sh_singlequote(s)
return "'" .. s:gsub("'", "'\\''") .. "'"
end
local function ensure_dir(path)
if not path or path == "" then return end
os.execute("mkdir -p " .. sh_singlequote(path))
end
local function parent_dir(path)
return path:match("^(.*)/[^/]+$")
end
-- Open `path` for append. Creates parent dirs if missing. Returns the session
-- handle, or (nil, errmsg) on open failure.
-- path : absolute path to the .jsonl file
-- meta : optional table written as the first line ONLY if the file is new /
-- empty. Use this for the {started, model, version, ...} header per
-- PHASE1.md §6.
function M.open(path, meta)
ensure_dir(parent_dir(path))
-- Detect new-or-empty before opening for append (append + read does not
-- give a portable way to inspect size on every libc). Simple two-step.
local existing = io.open(path, "r")
local is_empty = true
if existing then
local first = existing:read("*l")
if first and #first > 0 then is_empty = false end
existing:close()
end
local fh, err = io.open(path, "a")
if not fh then return nil, err end
local sess = setmetatable({ path = path, fh = fh, closed = false }, Session)
if is_empty and meta then
sess:append({ meta = meta })
end
return sess
end
function Session:append(turn)
if self.closed then return false, "session closed" end
local line = json.encode(turn)
-- write + flush so a crash mid-session preserves all turns up to the
-- last full append. Phase 1 default: no fsync per line (would dominate
-- runtime on slow disks). Q16 tracks fsync policy if it ever bites.
self.fh:write(line, "\n")
self.fh:flush()
return true
end
function Session:close()
if self.closed then return end
self.fh:close()
self.fh = nil
self.closed = true
end
-- Load a session file. Returns:
-- turns, meta : turns is ALWAYS a table on success (possibly empty);
-- meta is the {meta={...}} header value or nil if absent
-- nil, err : on file open failure (turns-first means callers can
-- test `if not turns then` without ambiguity vs a missing
-- meta-header line)
function M.load(path)
local fh, err = io.open(path, "r")
if not fh then return nil, err end
local meta, turns = nil, {}
local first = true
for line in fh:lines() do
if #line > 0 then
local obj = json.decode(line)
if obj then
if first and obj.meta then
meta = obj.meta
elseif obj.role and obj.content then
turns[#turns + 1] = obj
end
end
-- malformed lines (e.g. trailing partial write before crash) are
-- silently skipped per the §6 recovery semantic
first = false
end
end
fh:close()
return turns, meta
end
-- List session files in `dir` (just file basenames matching *.jsonl). Phase 1
-- minimum: name only. mtime / turn count are a Phase 4 concern when :sessions
-- starts wanting to surface a richer picker. Returns:
-- array of strings (basenames, no path prefix)
-- may be empty if dir doesn't exist
function M.list_sessions(dir)
local out = {}
if not dir or dir == "" then return out end
-- io.popen here is plain ls; executor.exec was swapped to PTY but
-- io.popen itself still works. Single-quote escaping for path safety
-- (see sh_singlequote rationale above).
local p = io.popen("ls -1 " .. sh_singlequote(dir) .. " 2>/dev/null")
if not p then return out end
for name in p:lines() do
if name:match("%.jsonl$") then out[#out + 1] = name end
end
p:close()
table.sort(out) -- ISO 8601 sorts lexicographically = chronologically
return out
end
-- ============================================================================
-- Phase 4: memory.jsonl — cross-session memory store.
-- Same JSONL convention as session logs, but a single shared file rather
-- than per-session. Single-writer enforced via flock advisory lock.
-- See docs/PHASE4.md §2 / §4.
-- ============================================================================
-- We need an integer fd for flock. io.open returns a Lua FILE*; LuaJIT
-- has no portable way to extract the underlying fd from that. Workaround:
-- open via libc directly using open(2). Already exposed close() in libc;
-- need to declare open() and read/write via the existing fd interface.
ffi.cdef[[
int open(const char *pathname, int flags, int mode);
long lseek(int fd, long offset, int whence);
]]
local O_RDWR = 2
local O_CREAT = 64 -- 0100 octal on Linux/glibc
local O_APPEND = 1024 -- 02000 octal on Linux/glibc
local SEEK_SET = 0
local FILE_MODE = 0x180 -- 0600 octal — owner rw only
-- ---------------------------------------------------------------- M.open_memory
-- Opens memory.jsonl at `path` for append, takes an exclusive non-blocking
-- flock on the fd, scans existing content for max id, writes a meta header
-- if the file is new. Returns:
-- handle, nil on success
-- nil, err on lock-held / open failure
function M.open_memory(path)
ensure_dir(parent_dir(path))
-- Open via libc open(2) so we have an integer fd for flock.
local fd = ffi.C.open(path,
bit and bit.bor(O_RDWR, O_CREAT, O_APPEND)
or (O_RDWR + O_CREAT + O_APPEND),
FILE_MODE)
-- bit lib may not be loaded; fall back to numeric add (flags don't
-- overlap so OR == add here).
if fd < 0 then
return nil, "open " .. path .. " failed: "
.. libc.strerror(libc.errno())
end
local ok, err = libc.flock(fd, libc.LOCK_EX + libc.LOCK_NB)
if not ok then
libc.close(fd)
return nil, "memory.jsonl held by another aish process ("
.. tostring(err) .. ")"
end
-- Scan existing content for max id. lseek back to start, read all.
local max_id = 0
local was_empty = true
ffi.C.lseek(fd, 0, SEEK_SET)
while true do
-- Read in 4K chunks. Use libc.read which returns string+len.
local chunk, n = libc.read(fd, 4096)
if not chunk or n == 0 then break end
was_empty = false
-- Accumulate into a buffer; on first scan we may straddle lines.
-- Simple approach: keep a tail and split on newlines.
for line in chunk:gmatch("[^\n]+") do
local obj = json.decode(line)
if obj and obj.id and obj.id > max_id then max_id = obj.id end
end
end
-- Seek to end so subsequent libc.write appends.
ffi.C.lseek(fd, 0, 2) -- SEEK_END
local handle = setmetatable({
path = path,
fd = fd,
next_id = max_id + 1,
closed = false,
}, Memory)
if was_empty then
-- Write meta header. No id; load_memory skips lines without id.
handle:_write_raw({
meta = {
aish_version = "phase4",
created = os.date("!%Y-%m-%dT%H:%M:%SZ"),
}
})
end
return handle
end
-- Internal: append one JSON line to the fd.
function Memory:_write_raw(obj)
local line = json.encode(obj) .. "\n"
libc.write(self.fd, line)
end
-- Append a memory item. Returns the assigned id.
function Memory:add(kind, content, tags, source)
assert(not self.closed, "memory:add on closed handle")
assert(kind == "fact" or kind == "pref" or kind == "context",
"memory:add: kind must be fact|pref|context (got " .. tostring(kind) .. ")")
assert(content and content ~= "", "memory:add: content required")
local id = self.next_id
self.next_id = id + 1
local item = {
id = id,
ts = os.date("!%Y-%m-%dT%H:%M:%SZ"),
kind = kind,
content = content,
}
if tags then item.tags = tags end
if source then item.source = source end
self:_write_raw(item)
return id
end
-- Append a tombstone for `target_id`. Idempotent at the file level; the
-- caller (e.g. `:memory forget` meta handler) may want to check
-- M.load_memory first to surface a "not active" status to the user (N1).
function Memory:forget(target_id)
assert(not self.closed, "memory:forget on closed handle")
self:_write_raw({
id = self.next_id,
ts = os.date("!%Y-%m-%dT%H:%M:%SZ"),
kind = "forget",
target = target_id,
})
self.next_id = self.next_id + 1
end
function Memory:close()
if self.closed then return end
-- flock is released automatically on fd close.
libc.close(self.fd)
self.fd = nil
self.closed = true
end
-- ---------------------------------------------------------------- M.load_memory
-- Read all items, resolve tombstones, return active set sorted by ts desc.
-- Items without an `id` field (e.g. the meta header) are silently dropped.
-- Tombstones with non-matching targets are no-ops.
-- Returns:
-- items_table array of {id, ts, kind, content, tags?, source?}
-- may be empty if file doesn't exist or contains only meta/tombstones
function M.load_memory(path)
local fh = io.open(path, "r")
if not fh then return {} end
local items = {} -- by id
local forget = {} -- set of target ids
for line in fh:lines() do
if #line > 0 then
local obj = json.decode(line)
if obj and obj.id then
if obj.kind == "forget" then
if obj.target then forget[obj.target] = true end
elseif obj.kind == "fact" or obj.kind == "pref"
or obj.kind == "context" then
items[obj.id] = obj
end
end
end
end
fh:close()
local active = {}
for id, item in pairs(items) do
if not forget[id] then active[#active + 1] = item end
end
-- Sort by ts descending (most recent first). Strings sort right when
-- they're ISO 8601 — ASCII order = chronological.
table.sort(active, function(a, b) return a.ts > b.ts end)
return active
end
-- ---------------------------------------------------------------- Phase 9 trust file
-- ~/.aish/trusted-projects (JSONL, mode 0600). One entry per accepted
-- project .aish.lua. Schema: {path = "<abs>", sha256 = "<hex>",
-- ts = "<iso>"}. sha256 binds bytes; content change re-prompts.
-- Internal helper: shell out to `sha256sum`. Returns hex digest or nil
-- on any failure (binary missing, file unreadable, etc.). Caller
-- treats nil as "skip the trust path" rather than crashing.
function M._sha256_file(path)
if not path or path == "" then return nil end
local q = "'" .. path:gsub("'", [['\'']]) .. "'"
local pipe = io.popen("sha256sum " .. q .. " 2>/dev/null")
if not pipe then return nil end
local line = pipe:read("*l")
pipe:close()
if not line then return nil end
local digest = line:match("^(%x+)") -- first whitespace-separated field
if digest and #digest == 64 then return digest end
return nil
end
-- Returns true iff a JSONL entry exists at trust_path matching BOTH
-- project_path AND sha256. Missing / unreadable / corrupt-line file
-- treated as "not trusted".
function M.is_trusted(trust_path, project_path, sha256)
if not (trust_path and project_path and sha256) then return false end
local fh = io.open(trust_path, "r")
if not fh then return false end
for line in fh:lines() do
if #line > 0 then
local entry = json.decode(line)
if entry and entry.path == project_path
and entry.sha256 == sha256 then
fh:close()
return true
end
end
end
fh:close()
return false
end
-- Appends a trust record. mkdir -p parent; chmod 0600 on first creation.
-- Append-only JSONL; partial writes corrupt at most one line (caller's
-- subsequent reads skip them).
function M.add_trusted(trust_path, project_path, sha256)
if not (trust_path and project_path and sha256) then return false end
ensure_dir(parent_dir(trust_path))
local fh = io.open(trust_path, "a")
if not fh then return false end
local ts = os.date("!%Y-%m-%dT%H:%M:%SZ")
fh:write(json.encode({ path = project_path, sha256 = sha256, ts = ts }) .. "\n")
fh:close()
-- Best-effort chmod 0600; ignore failure (next read will succeed).
os.execute("chmod 600 '" .. trust_path:gsub("'", [['\'']]) .. "' 2>/dev/null")
return true
end
return M