2ea1d50fa3
Neuron Soul CI / build (pull_request) Successful in 5m10s
Tooling built on Tim's machine to run Neuron from the terminal as a
Claude Code session (identity + graph memory + agency) instead of
relaying to the soul's /api/chat.
- cli/neuron_recall.py BM25 read over the engram snapshot + CLI memories
(works around pinned-only soul search)
- cli/neuron_remember.py reliable local memory writes with read-back verify
(works around the corrupting capture endpoint)
- cli/neuron-chat.py standalone direct-chat REPL with per-turn memory injection
- cli/neuron_mcp.py stdlib MCP server (chat/search) with graceful degradation
- cli/CLAUDE.md.example the operating identity that makes Claude Code run as Neuron
- cli/HANDOFF.md soul-side bugs to fix so this becomes unnecessary
Scaffolding/proposal - intended to be retired once the soul does native
retrieval, correct persistence, and a real CLI identity/voice surface.
Pairs with the runtime model-passthrough + UTF-8 fixes in the el repo.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
141 lines
4.2 KiB
Python
141 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
neuron_recall — Neuron's memory read path.
|
|
|
|
BM25 search over the engram graph snapshot (~3,900 nodes) PLUS Neuron's own
|
|
save-as-you-go CLI memories. This is how Neuron (running as Claude Code) recalls
|
|
what it knows, since the soul's built-in search is broken.
|
|
|
|
Usage:
|
|
python3 ~/neuron_recall.py "what do I know about VBD"
|
|
python3 ~/neuron_recall.py "Tim Lingo" 8 # second arg = number of hits
|
|
"""
|
|
import collections
|
|
import glob
|
|
import json
|
|
import math
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
SNAP = os.path.expanduser("~/.neuron/engram/snapshot.json")
|
|
MEMS = os.path.expanduser("~/.neuron/neuron-cli-memories.jsonl")
|
|
|
|
|
|
def toks(s):
|
|
return re.findall(r"[a-z0-9]+", (s or "").lower())
|
|
|
|
|
|
def sanitize(text):
|
|
if not text:
|
|
return ""
|
|
cleaned = "".join(ch if (32 <= ord(ch) < 127 or ch in "\n\t") else " " for ch in text)
|
|
return re.sub(r"[ \t]+", " ", cleaned).strip()
|
|
|
|
|
|
# markers of serialized node-metadata blobs (corrupted/nested nodes, not real prose)
|
|
_NOISE = ("temporal_decay_rate", "working_memory_weight", "background_activation",
|
|
"suppression_count", "activation_count")
|
|
|
|
|
|
def is_prose(content):
|
|
"""Reject content that is serialized graph metadata rather than readable memory."""
|
|
if sum(m in content for m in _NOISE) >= 2:
|
|
return False
|
|
# too much JSON punctuation density -> it's a data blob, not prose
|
|
punct = content.count('":') + content.count(',"') + content.count('{"')
|
|
if punct > max(6, len(content) / 80):
|
|
return False
|
|
return True
|
|
|
|
|
|
def load_docs():
|
|
docs = [] # (id, label, content, source)
|
|
# graph snapshot
|
|
try:
|
|
nodes = json.loads(open(SNAP, encoding="utf-8", errors="replace").read()).get("nodes", [])
|
|
for n in nodes:
|
|
orig = n.get("content") or ""
|
|
c = sanitize(orig)
|
|
if len(c) < 40 or len(c) / max(len(orig), 1) <= 0.6:
|
|
continue
|
|
if not is_prose(c):
|
|
continue
|
|
docs.append((sanitize(n.get("id", "")) or "node",
|
|
sanitize(n.get("label", "") or n.get("title", "")),
|
|
c, "graph"))
|
|
except Exception:
|
|
pass
|
|
# Neuron's own CLI memories (most recent first matters less; BM25 ranks)
|
|
if os.path.exists(MEMS):
|
|
for line in open(MEMS, encoding="utf-8", errors="replace"):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
m = json.loads(line)
|
|
except Exception:
|
|
continue
|
|
c = sanitize(m.get("content", ""))
|
|
if c:
|
|
docs.append((m.get("id", "mem"), m.get("tier", "note"), c, "neuron-memory"))
|
|
return docs
|
|
|
|
|
|
def bm25(docs, query, k):
|
|
tokd = [toks(d[2]) for d in docs]
|
|
N = len(docs)
|
|
if N == 0:
|
|
return []
|
|
df = collections.Counter()
|
|
for t in tokd:
|
|
for w in set(t):
|
|
df[w] += 1
|
|
idf = {w: math.log(1 + (N - f + 0.5) / (f + 0.5)) for w, f in df.items()}
|
|
avgdl = sum(len(t) for t in tokd) / N
|
|
qt = toks(query)
|
|
scored = []
|
|
for i, t in enumerate(tokd):
|
|
tf = collections.Counter(t)
|
|
dl = len(t)
|
|
s = 0.0
|
|
for w in qt:
|
|
f = tf.get(w, 0)
|
|
if f:
|
|
s += idf.get(w, 0) * (f * 2.5) / (f + 1.5 * (1 - 0.75 + 0.75 * dl / avgdl))
|
|
if s > 0:
|
|
scored.append((s, i))
|
|
scored.sort(reverse=True)
|
|
out, seen = [], set()
|
|
for _, i in scored:
|
|
sig = docs[i][2][:120]
|
|
if sig in seen:
|
|
continue
|
|
seen.add(sig)
|
|
out.append(docs[i])
|
|
if len(out) >= k:
|
|
break
|
|
return out
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print("usage: neuron_recall.py \"<query>\" [n]")
|
|
return
|
|
query = sys.argv[1]
|
|
k = int(sys.argv[2]) if len(sys.argv) > 2 else 6
|
|
docs = load_docs()
|
|
hits = bm25(docs, query, k)
|
|
if not hits:
|
|
print(f"(no memories matched '{query}')")
|
|
return
|
|
print(f"# {len(hits)} memories for: {query}\n")
|
|
for _id, label, content, source in hits:
|
|
tag = "★" if source == "neuron-memory" else "·"
|
|
head = f" [{label}]" if label else ""
|
|
print(f"{tag}{head}\n{content[:700].strip()}\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|