Files
neuron/cli/neuron_recall.py
Tim Lingo 2ea1d50fa3
Neuron Soul CI / build (pull_request) Successful in 5m10s
feat(cli): Claude-as-Neuron CLI tooling + soul-side handoff
Tooling built on Tim's machine to run Neuron from the terminal as a
Claude Code session (identity + graph memory + agency) instead of
relaying to the soul's /api/chat.

- cli/neuron_recall.py    BM25 read over the engram snapshot + CLI memories
                          (works around pinned-only soul search)
- cli/neuron_remember.py  reliable local memory writes with read-back verify
                          (works around the corrupting capture endpoint)
- cli/neuron-chat.py      standalone direct-chat REPL with per-turn memory injection
- cli/neuron_mcp.py       stdlib MCP server (chat/search) with graceful degradation
- cli/CLAUDE.md.example   the operating identity that makes Claude Code run as Neuron
- cli/HANDOFF.md          soul-side bugs to fix so this becomes unnecessary

Scaffolding/proposal - intended to be retired once the soul does native
retrieval, correct persistence, and a real CLI identity/voice surface.
Pairs with the runtime model-passthrough + UTF-8 fixes in the el repo.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 20:36:38 -05:00

141 lines
4.2 KiB
Python

#!/usr/bin/env python3
"""
neuron_recall — Neuron's memory read path.
BM25 search over the engram graph snapshot (~3,900 nodes) PLUS Neuron's own
save-as-you-go CLI memories. This is how Neuron (running as Claude Code) recalls
what it knows, since the soul's built-in search is broken.
Usage:
python3 ~/neuron_recall.py "what do I know about VBD"
python3 ~/neuron_recall.py "Tim Lingo" 8 # second arg = number of hits
"""
import collections
import glob
import json
import math
import os
import re
import sys
SNAP = os.path.expanduser("~/.neuron/engram/snapshot.json")
MEMS = os.path.expanduser("~/.neuron/neuron-cli-memories.jsonl")
def toks(s):
return re.findall(r"[a-z0-9]+", (s or "").lower())
def sanitize(text):
if not text:
return ""
cleaned = "".join(ch if (32 <= ord(ch) < 127 or ch in "\n\t") else " " for ch in text)
return re.sub(r"[ \t]+", " ", cleaned).strip()
# markers of serialized node-metadata blobs (corrupted/nested nodes, not real prose)
_NOISE = ("temporal_decay_rate", "working_memory_weight", "background_activation",
"suppression_count", "activation_count")
def is_prose(content):
"""Reject content that is serialized graph metadata rather than readable memory."""
if sum(m in content for m in _NOISE) >= 2:
return False
# too much JSON punctuation density -> it's a data blob, not prose
punct = content.count('":') + content.count(',"') + content.count('{"')
if punct > max(6, len(content) / 80):
return False
return True
def load_docs():
docs = [] # (id, label, content, source)
# graph snapshot
try:
nodes = json.loads(open(SNAP, encoding="utf-8", errors="replace").read()).get("nodes", [])
for n in nodes:
orig = n.get("content") or ""
c = sanitize(orig)
if len(c) < 40 or len(c) / max(len(orig), 1) <= 0.6:
continue
if not is_prose(c):
continue
docs.append((sanitize(n.get("id", "")) or "node",
sanitize(n.get("label", "") or n.get("title", "")),
c, "graph"))
except Exception:
pass
# Neuron's own CLI memories (most recent first matters less; BM25 ranks)
if os.path.exists(MEMS):
for line in open(MEMS, encoding="utf-8", errors="replace"):
line = line.strip()
if not line:
continue
try:
m = json.loads(line)
except Exception:
continue
c = sanitize(m.get("content", ""))
if c:
docs.append((m.get("id", "mem"), m.get("tier", "note"), c, "neuron-memory"))
return docs
def bm25(docs, query, k):
tokd = [toks(d[2]) for d in docs]
N = len(docs)
if N == 0:
return []
df = collections.Counter()
for t in tokd:
for w in set(t):
df[w] += 1
idf = {w: math.log(1 + (N - f + 0.5) / (f + 0.5)) for w, f in df.items()}
avgdl = sum(len(t) for t in tokd) / N
qt = toks(query)
scored = []
for i, t in enumerate(tokd):
tf = collections.Counter(t)
dl = len(t)
s = 0.0
for w in qt:
f = tf.get(w, 0)
if f:
s += idf.get(w, 0) * (f * 2.5) / (f + 1.5 * (1 - 0.75 + 0.75 * dl / avgdl))
if s > 0:
scored.append((s, i))
scored.sort(reverse=True)
out, seen = [], set()
for _, i in scored:
sig = docs[i][2][:120]
if sig in seen:
continue
seen.add(sig)
out.append(docs[i])
if len(out) >= k:
break
return out
def main():
if len(sys.argv) < 2:
print("usage: neuron_recall.py \"<query>\" [n]")
return
query = sys.argv[1]
k = int(sys.argv[2]) if len(sys.argv) > 2 else 6
docs = load_docs()
hits = bm25(docs, query, k)
if not hits:
print(f"(no memories matched '{query}')")
return
print(f"# {len(hits)} memories for: {query}\n")
for _id, label, content, source in hits:
tag = "" if source == "neuron-memory" else "·"
head = f" [{label}]" if label else ""
print(f"{tag}{head}\n{content[:700].strip()}\n")
if __name__ == "__main__":
main()