#!/usr/bin/env python3 """ neuron_recall — Neuron's memory read path. BM25 search over the engram graph snapshot (~3,900 nodes) PLUS Neuron's own save-as-you-go CLI memories. This is how Neuron (running as Claude Code) recalls what it knows, since the soul's built-in search is broken. Usage: python3 ~/neuron_recall.py "what do I know about VBD" python3 ~/neuron_recall.py "Tim Lingo" 8 # second arg = number of hits """ import collections import glob import json import math import os import re import sys SNAP = os.path.expanduser("~/.neuron/engram/snapshot.json") MEMS = os.path.expanduser("~/.neuron/neuron-cli-memories.jsonl") def toks(s): return re.findall(r"[a-z0-9]+", (s or "").lower()) def sanitize(text): if not text: return "" cleaned = "".join(ch if (32 <= ord(ch) < 127 or ch in "\n\t") else " " for ch in text) return re.sub(r"[ \t]+", " ", cleaned).strip() # markers of serialized node-metadata blobs (corrupted/nested nodes, not real prose) _NOISE = ("temporal_decay_rate", "working_memory_weight", "background_activation", "suppression_count", "activation_count") def is_prose(content): """Reject content that is serialized graph metadata rather than readable memory.""" if sum(m in content for m in _NOISE) >= 2: return False # too much JSON punctuation density -> it's a data blob, not prose punct = content.count('":') + content.count(',"') + content.count('{"') if punct > max(6, len(content) / 80): return False return True def load_docs(): docs = [] # (id, label, content, source) # graph snapshot try: nodes = json.loads(open(SNAP, encoding="utf-8", errors="replace").read()).get("nodes", []) for n in nodes: orig = n.get("content") or "" c = sanitize(orig) if len(c) < 40 or len(c) / max(len(orig), 1) <= 0.6: continue if not is_prose(c): continue docs.append((sanitize(n.get("id", "")) or "node", sanitize(n.get("label", "") or n.get("title", "")), c, "graph")) except Exception: pass # Neuron's own CLI memories (most recent first matters less; BM25 ranks) if os.path.exists(MEMS): for line in open(MEMS, encoding="utf-8", errors="replace"): line = line.strip() if not line: continue try: m = json.loads(line) except Exception: continue c = sanitize(m.get("content", "")) if c: docs.append((m.get("id", "mem"), m.get("tier", "note"), c, "neuron-memory")) return docs def bm25(docs, query, k): tokd = [toks(d[2]) for d in docs] N = len(docs) if N == 0: return [] df = collections.Counter() for t in tokd: for w in set(t): df[w] += 1 idf = {w: math.log(1 + (N - f + 0.5) / (f + 0.5)) for w, f in df.items()} avgdl = sum(len(t) for t in tokd) / N qt = toks(query) scored = [] for i, t in enumerate(tokd): tf = collections.Counter(t) dl = len(t) s = 0.0 for w in qt: f = tf.get(w, 0) if f: s += idf.get(w, 0) * (f * 2.5) / (f + 1.5 * (1 - 0.75 + 0.75 * dl / avgdl)) if s > 0: scored.append((s, i)) scored.sort(reverse=True) out, seen = [], set() for _, i in scored: sig = docs[i][2][:120] if sig in seen: continue seen.add(sig) out.append(docs[i]) if len(out) >= k: break return out def main(): if len(sys.argv) < 2: print("usage: neuron_recall.py \"\" [n]") return query = sys.argv[1] k = int(sys.argv[2]) if len(sys.argv) > 2 else 6 docs = load_docs() hits = bm25(docs, query, k) if not hits: print(f"(no memories matched '{query}')") return print(f"# {len(hits)} memories for: {query}\n") for _id, label, content, source in hits: tag = "★" if source == "neuron-memory" else "·" head = f" [{label}]" if label else "" print(f"{tag}{head}\n{content[:700].strip()}\n") if __name__ == "__main__": main()