diff --git a/config.lua b/config.lua
index 58bc39a..621ddc6 100644
--- a/config.lua
+++ b/config.lua
@@ -285,6 +285,28 @@ return {
     --     probe_grammar      = [[root ::= ("YES" | "NO")]],
     -- },
 
+    -- ── Issue #87 (route-aware context compression).
+    -- When a routed model preset has `local_compress = true`, each
+    -- broker call against THAT preset gets a compressed view of
+    -- ctx.turns: only the last `keep_turns` turns; any turn whose
+    -- content exceeds `max_turn_chars` is tail-truncated. The full
+    -- context lives on (visible via :history); compression is purely
+    -- per-request for small models that effectively use a fraction
+    -- of their advertised context window.
+    --
+    -- Set the per-model opt-in on models[<name>]:
+    --     models.fast = { ..., local_compress = true }
+    -- Defaults live under context.compress:
+    --     context = {
+    --         ...
+    --         compress = { keep_turns = 2, max_turn_chars = 800 },
+    --     }
+    --
+    -- Trade-off documented in the FR: tool turns lose information
+    -- when tail-truncated. Acceptable for shell-output blocks (the
+    -- tail is usually the relevant bit); known limitation for
+    -- structured tool results. Disable per-model if it bites.
+
     -- ── Phase 5 context summarization on sliding-window eviction.
     -- Set INSIDE the context = { ... } block above to enable:
     --     context = {
diff --git a/context.lua b/context.lua
index 6acbe36..895dff7 100644
--- a/context.lua
+++ b/context.lua
@@ -228,6 +228,40 @@ The user will be prompted to confirm destructive actions; expect their
 verdict in the next turn as a synthesized "[aish] ... skipped by user"
 message if they declined.]]
 
+-- #87: route-aware context compression. Keeps the LAST keep_turns
+-- turns; tail-truncates any turn whose content exceeds max_turn_chars.
+-- Drops tool turns at the slice head (they'd be orphaned without
+-- their assistant-with-tool_calls anchor; strict chat templates
+-- reject the resulting tool-without-anchor shape). Returns a new
+-- list of turn-shaped tables; self.turns is NEVER mutated.
+local function _compress_turns(turns, keep_turns, max_chars)
+    local n = #turns
+    if keep_turns and n > keep_turns then
+        -- start index is the first turn we keep
+    end
+    local start = math.max(1, n - (keep_turns or 2) + 1)
+    -- Drop orphan tool turns at the head.
+    while start <= n and turns[start].role == "tool" do
+        start = start + 1
+    end
+    local out = {}
+    for i = start, n do
+        local t = turns[i]
+        local c = t.content or ""
+        if max_chars and #c > max_chars then
+            out[#out + 1] = {
+                role         = t.role,
+                content      = c:sub(-max_chars),
+                tool_calls   = t.tool_calls,
+                tool_call_id = t.tool_call_id,
+            }
+        else
+            out[#out + 1] = t  -- ref the existing turn; no copy needed
+        end
+    end
+    return out
+end
+
 function Context:to_messages(opts)
     -- Phase 10 (#86): per-call system_prompt_override. Replaces the
     -- BASE system_prompt for THIS render only (state unchanged); the
@@ -254,8 +288,19 @@ function Context:to_messages(opts)
     end
     local msgs = { { role = "system", content = sys_content } }
 
+    -- #87: route-aware compression. When opts.compress is set, swap
+    -- the turn iterable for a truncated copy. self.turns unchanged
+    -- (this is a per-render transformation; persistence + display
+    -- via :history see the full context).
+    local turns = self.turns
+    if opts and opts.compress then
+        turns = _compress_turns(self.turns,
+            opts.compress.keep_turns or 2,
+            opts.compress.max_turn_chars or 800)
+    end
+
     if self.use_tool_role then
-        for _, t in ipairs(self.turns) do
+        for _, t in ipairs(turns) do
             local m = { role = t.role, content = t.content }
             if t.role == "assistant" and t.tool_calls then
                 -- OpenAI shape wraps each call as
@@ -292,9 +337,10 @@ function Context:to_messages(opts)
         end
     end
 
+    -- #87: same compressed `turns` view used by the fallback path.
     local i = 1
-    while i <= #self.turns do
-        local t = self.turns[i]
+    while i <= #turns do
+        local t = turns[i]
         if t.role == "assistant" and t.tool_calls then
             local parts = {}
             if t.content and t.content ~= "" then
@@ -302,7 +348,7 @@ function Context:to_messages(opts)
             end
             for ci, call in ipairs(t.tool_calls) do
                 local result_text = ""
-                local next_t = self.turns[i + ci]
+                local next_t = turns[i + ci]
                 if next_t and next_t.role == "tool"
                           and next_t.tool_call_id == call.id then
                     result_text = next_t.content
diff --git a/repl.lua b/repl.lua
index fdbdd1a..657919f 100644
--- a/repl.lua
+++ b/repl.lua
@@ -1007,6 +1007,20 @@ function M.run(config)
                              and config.routing.grammars
                              and req_class
                              and config.routing.grammars[req_class]
+        -- #87: route-aware context compression. When the routed model
+        -- preset has `local_compress = true`, ctx:to_messages keeps only
+        -- the last N turns and tail-truncates oversized content for
+        -- THIS request. Cloud routes (model_cfg.local_compress nil/false)
+        -- get the full context unchanged. Defaults from cfg.context.compress;
+        -- per-model opt-in keeps the design surface predictable.
+        local compress_opts
+        if req_cfg and req_cfg.local_compress then
+            local cc = (config.context and config.context.compress) or {}
+            compress_opts = {
+                keep_turns     = cc.keep_turns     or 2,
+                max_turn_chars = cc.max_turn_chars or 800,
+            }
+        end
 
         local depth = 0
         local final_resp = ""
@@ -1017,7 +1031,10 @@ function M.run(config)
             local tool_calls_seen = {}
             local redact_mode     = secrets_mode_for(req_cfg)
             local scrubbed_msgs   = scrub_messages(
-                ctx:to_messages({ system_prompt_override = sys_override }),
+                ctx:to_messages({
+                    system_prompt_override = sys_override,
+                    compress               = compress_opts,
+                }),
                 redact_mode)
             -- Streaming rehydrator wraps the on_delta so the user sees real
             -- values; text_parts accumulates the REHYDRATED chunks so