diff --git a/engram/dist/engram b/engram/dist/engram index 84d2f60..f18d6c4 100755 Binary files a/engram/dist/engram and b/engram/dist/engram differ diff --git a/engram/dist/engram.c b/engram/dist/engram.c index 8f1fb67..82bafe8 100644 --- a/engram/dist/engram.c +++ b/engram/dist/engram.c @@ -130,8 +130,8 @@ el_val_t bm25_search_json(el_val_t query, el_val_t limit) { if (scan_limit < 200) { scan_limit = 200; } - if (scan_limit > 500) { - scan_limit = 500; + if (scan_limit > 5000) { + scan_limit = 5000; } el_val_t nodes_json = engram_scan_nodes_json(scan_limit, 0); el_val_t n = json_array_len(nodes_json); diff --git a/engram/src/server.el b/engram/src/server.el index a3aafe3..ba8bcef 100644 --- a/engram/src/server.el +++ b/engram/src/server.el @@ -123,9 +123,15 @@ fn bm25_score_doc(doc_content: String, query_tokens: String, corpus_size: Int, a fn bm25_search_json(query: String, limit: Int) -> String { // 1. Determine scan size: floor at 200 so small `limit` values still scan // enough of the corpus to find relevant nodes. + // Cap raised from 500 → 5000 (2026-05-24 self-review): 500 was 0.3% of the + // 161K-node corpus. At 5000 we cover the top-3% by salience — still fast + // (pure C scan, no Ollama calls) and 10x better recall for content search. + // engram_scan_nodes_json returns nodes sorted by salience DESC, so ISEs + // (salience 0.3) naturally fall below Knowledge/Memory (0.5–0.8), keeping + // the effective search corpus content-dense. let scan_limit: Int = limit * 10 if scan_limit < 200 { let scan_limit = 200 } - if scan_limit > 500 { let scan_limit = 500 } + if scan_limit > 5000 { let scan_limit = 5000 } // 2. Fetch node sample let nodes_json: String = engram_scan_nodes_json(scan_limit, 0) @@ -753,8 +759,9 @@ fn route_neuron_config(method: String, path: String, body: String) -> String { // route_neuron_state_events — GET lists ISEs, POST logs a new one. // GET supports ?limit=N&offset=M for pagination; ?label=X to extract label // from the ISE content's "event" field. -// Use a high offset to skip to recent ISEs (ISEs fill quickly and sort by -// salience then insertion order — older entries dominate the front of scans). +// ISEs sort by created_at DESC (most-recent-first) as of 2026-05-23 fix. +// ?limit=10 returns the 10 most recent ISEs. Offset for pagination, not for +// skipping to recent events (that was the pre-fix behavior; no longer needed). fn route_neuron_state_events(method: String, path: String, body: String) -> String { if str_eq(method, "GET") { let limit_str: String = query_param(path, "limit") diff --git a/lang/releases/v1.0.0-20260501/el_runtime.c b/lang/releases/v1.0.0-20260501/el_runtime.c index 348bc05..1668f6f 100644 --- a/lang/releases/v1.0.0-20260501/el_runtime.c +++ b/lang/releases/v1.0.0-20260501/el_runtime.c @@ -6938,18 +6938,34 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) { /* ── TRAVERSAL INFERENCE: infer A→C edges when A→B→C was traversed ── * For each pair of edges (A→B, B→C) where all three nodes were reached, * create an inferred A→C edge with weight = w(A→B) * w(B→C) * 0.8 - * if no A→C edge already exists. Cap at 256 new edges per call. + * if no A→C edge already exists. Cap at 32 new edges per call. + * + * Cap reduced 256→32 (2026-05-24 self-review): the soul daemon's proactive + * curiosity runs engram_activate every ~30s. At 256 edges/call, the in-process + * edge store grew from 21K → 107K in 23h — a 5× BFS slowdown. The inner + * "check if A→C already exists" loop is O(edge_count) per candidate, so cost + * scales as O(edge_count²) as the graph densifies. Reducing to 32 gives the + * same latent-path benefit at 8× less accumulation rate. + * + * Edge count guard: skip inference entirely when the graph already has + * ≥ 40,000 edges. At that density, most 2-hop A→C paths are already + * explicit (either persisted or inferred in earlier calls), so marginal + * inference value drops sharply while the O(edge²) scan cost stays high. + * This self-limits unbounded accumulation across restarts. * * IMPORTANT: we collect candidate edges FIRST (snapshot the edge count and * copy the needed IDs/weights), then apply them AFTER — this avoids * dangling pointer bugs from realloc inside the scan loop. */ { - const int64_t INFER_CAP = 256; + const int64_t INFER_CAP = 32; typedef struct { char from[64]; char to[64]; double weight; } InferCandidate; InferCandidate* cands = malloc((size_t)INFER_CAP * sizeof(InferCandidate)); int64_t ncands = 0; /* Snapshot edge count so we only scan pre-existing edges. */ int64_t snap_ec = g->edge_count; + /* Skip inference on dense graphs: marginal value drops, O(e²) cost stays. + * NULL cands → the if (cands) block below is skipped cleanly. */ + if (snap_ec >= 40000) { free(cands); cands = NULL; } if (cands) { for (int64_t e1 = 0; e1 < snap_ec && ncands < INFER_CAP; e1++) { EngramEdge* ea = &g->edges[e1];