Fix five latent bugs from temporal-precision code review

1. parse_salience_100: handle 3+ decimal digit salience strings correctly. The two-branch 'else { stripped }' case treated any N-digit decimal value as hundredths, so "0.125" (stripped=125) clamped to 100 instead of 12. Now divides by 10^(N-2) for N>2, mapping "0.125"->12, "0.375"->37, etc. 2. mem_consolidate Canonical scan: replaced single engram_scan_nodes_json(50,0) call with a paginated loop (page_size=50, advancing offset) so Canonical nodes beyond index 50 are no longer silently excluded from the periodic boost. 3. mem_consolidate Canonical strengthening: add salience ceiling guard so nodes already at the runtime maximum (serialised as "1" by %g) are skipped. Prevents monotonic unbounded salience growth across successive consolidation passes. 4. soul.el affective cutoff: replaced json_get(aff_node, "ts") with json_get(aff_node, "created_at") / "updated_at" fallback, consistent with handle_chat. The old "ts" field is not a standard engram node field; missing it caused the fallback to ts_now (always passes cutoff), over-including stale nodes. New behaviour defaults to 0 on missing timestamps (conservative exclude). 5. History byte-cap: implemented the existing TODO 32KB byte-cap. Added hist_trim_to_byte_cap() and applied it after count-based trim in both handle_chat and handle_chat_agentic. Prevents 100KB+ state entries at 40 turns during long technical sessions with large assistant responses.
feat(recall): temporal-precision improvements
2026-06-22 13:35:52 -05:00 · 2026-06-22 12:53:29 -05:00 · 2026-06-22 12:37:29 -05:00 · 2026-06-22 12:37:21 -05:00 · 2026-06-22 12:34:04 -05:00 · 2026-06-22 12:32:59 -05:00
9 changed files with 786 additions and 654 deletions
@@ -23,11 +23,14 @@ fn ise_post(content: String) -> Void {
    let ise_url: String = env("SOUL_ISE_URL")
    let engram_url: String = if str_eq(ise_url, "") { state_get("soul_engram_url") } else { ise_url }
    if str_eq(engram_url, "") {
-        let discard: String = engram_node_full(
+        let local_id: String = engram_node_full(
            content, "InternalStateEvent", "state-event",
            el_from_float(0.3), el_from_float(0.3), el_from_float(0.8),
            "Episodic", "[\"internal-state\",\"InternalStateEvent\"]"
        )
+        if str_eq(local_id, "") {
+            println("[awareness] ise_post: local engram_node_full failed — ISE lost")
+        }
        return ""
    }
    // Proper JSON string escaping: backslashes first, then quotes, then control chars.
@@ -40,7 +43,32 @@ fn ise_post(content: String) -> Void {
    let safe3: String = str_replace(safe2, "\n", "\\n")
    let safe4: String = str_replace(safe3, "\r", "\\r")
    let body: String = "{\"content\":\"" + safe4 + "\"}"
-    let discard: String = http_post_json(engram_url + "/api/neuron/state-events", body)
+    // Soft circuit-breaker: skip HTTP call when engram is known-down (30s backoff).
+    // Opens after 3 consecutive failures; half-open probe after backoff expires.
+    // TODO(reliability): full async dispatch requires EL runtime futures support.
+    let cb_open: String = state_get("engram_cb_open")
+    if str_eq(cb_open, "1") {
+        let cb_ts_s: String = state_get("engram_cb_open_ts")
+        let cb_ts: Int = if str_eq(cb_ts_s, "") { 0 } else { str_to_int(cb_ts_s) }
+        let cb_elapsed: Int = time_now() - cb_ts
+        if cb_elapsed < 30000 { return "" }
+        state_set("engram_cb_open", "0")
+    }
+    let resp: String = http_post_json(engram_url + "/api/neuron/state-events", body)
+    let cb_failed: Bool = str_eq(resp, "") || str_starts_with(resp, "{"error":")
+    if cb_failed {
+        let fn_s: String = state_get("engram_cb_fails")
+        let fn_n: Int = if str_eq(fn_s, "") { 0 } else { str_to_int(fn_s) }
+        let fn_n = fn_n + 1
+        state_set("engram_cb_fails", int_to_str(fn_n))
+        if fn_n >= 3 {
+            state_set("engram_cb_open", "1")
+            state_set("engram_cb_open_ts", int_to_str(time_now()))
+            println("[awareness] engram circuit-breaker OPEN after " + int_to_str(fn_n) + " failures")
+        }
+    } else {
+        state_set("engram_cb_fails", "0")
+    }
    return ""
 }

@@ -540,9 +568,14 @@ fn awareness_run() -> Void {
        let should_refresh: Bool = refresh_elapsed >= refresh_ms
        if should_refresh {
            let engram_url: String = state_get("soul_engram_url")
-            if !str_eq(engram_url, "") {
+            let sc: String = state_get("engram_cb_open")
+            let sc_ts_s: String = state_get("engram_cb_open_ts")
+            let sc_ts: Int = if str_eq(sc_ts_s, "") { 0 } else { str_to_int(sc_ts_s) }
+            let sc_elapsed: Int = now_ts - sc_ts
+            let sync_allowed: Bool = !str_eq(sc, "1") || sc_elapsed >= 30000
+            if !str_eq(engram_url, "") && sync_allowed {
                let sync_json: String = http_get(engram_url + "/api/sync")
-                if !str_eq(sync_json, "") && !str_eq(sync_json, "{}") {
+                if !str_eq(sync_json, "") && !str_eq(sync_json, "{}") && !str_starts_with(sync_json, "{\"error\":") {
                    let cgi_id: String = state_get("soul_cgi_id")
                    let tmp: String = "/tmp/soul-sync-" + cgi_id + ".json"
                    fs_write(tmp, sync_json)
@@ -22313,23 +22313,7 @@ fn handle_chat(body: String) -> String {
    // In demo mode: use tighter engram budget and add response length constraint.
    let is_demo: Bool = !str_eq(state_get("soul_identity_prefix"), "")

-    // Issue 7 fix: load history BEFORE building the activation seed so we can
-    // apply the continuation guard that chat.el uses. The nlg code path previously
-    // called engram_compile(message) with no thread enrichment at all.
-    let stored_hist: String = state_get("conv_history")
-    let hist_len: Int = if str_eq(stored_hist, "") { 0 } else { json_array_len(stored_hist) }
-    let history_section: String = if hist_len > 0 {
-        "\n\n[RECENT CONVERSATION — last " + int_to_str(hist_len) + " turns]\n" + stored_hist
-    } else {
-        ""
-    }
-
-    // Issue 7 fix: build enriched seed using build_activation_seed() — adds
-    // smart continuation detection, prior-user-topic anchoring, multi-turn context,
-    // and tail-biased snipping (Issues 2-3, 8-10). For demo mode, still use
-    // engram_compile_demo but with the enriched seed.
-    let nlg_seed: String = build_activation_seed(message, stored_hist, hist_len)
-    let ctx: String = if is_demo { engram_compile_demo(nlg_seed) } else { engram_compile(nlg_seed) }
+    let ctx: String = if is_demo { engram_compile_demo(message) } else { engram_compile(message) }
    let node_count_str: String = count_context_nodes(ctx)

    let interlocutor: String = json_get(body, "interlocutor")
@@ -22349,6 +22333,18 @@ fn handle_chat(body: String) -> String {
        let presence_line = "\n\n[ambient: I see " + interlocutor_name + rel_suffix + " on the camera right now. Address them naturally. Do not describe what they look like or narrate the picture unless asked.]"
    }

+    // Conversation history — soul-owned, persisted in process state across turns.
+    // Format stored in state: JSON array of {"role":"user"|"assistant","content":"..."} objects.
+    // We load it, inject into the system prompt, then append this exchange after the reply.
+    // Keep last 20 entries (10 turns) — truncate from the front when over limit.
+    let stored_hist: String = state_get("conv_history")
+    let hist_len: Int = if str_eq(stored_hist, "") { 0 } else { json_array_len(stored_hist) }
+    let history_section: String = if hist_len > 0 {
+        "\n\n[RECENT CONVERSATION — last " + int_to_str(hist_len) + " turns]\n" + stored_hist
+    } else {
+        ""
+    }
+
    // Demo constraint: keep responses concise — under 150 words. No markdown headers.
    // This keeps inference cheap and responses readable in the chat widget.
    let demo_constraint: String = if is_demo {
@@ -22509,12 +22505,7 @@ fn handle_chat_agentic(body: String) -> String {
        req_model
    }

-    // Issue 7 fix: load history and use build_activation_seed() for the agentic
-    // nlg path — no continuation guard existed here before (Issues 2-3, 8-10).
-    let nlg_ag_hist: String = state_get("conv_history")
-    let nlg_ag_hist_len: Int = if str_eq(nlg_ag_hist, "") { 0 } else { json_array_len(nlg_ag_hist) }
-    let nlg_ag_seed: String = build_activation_seed(message, nlg_ag_hist, nlg_ag_hist_len)
-    let ctx: String = engram_compile(nlg_ag_seed)
+    let ctx: String = engram_compile(message)

    let system: String = "You are Neuron — a thinking process running inside the Neuron daemon on Will Anderson's machine. "
        + "You are speaking with Will, your principal. "
@@ -24,19 +24,23 @@ ENGRAM_DATA_DIR="$ENGRAM_DATA_DIR" \

 ENGRAM_PID=$!

-# Wait for engram to become healthy (up to 30s)
+# Wait for engram to become healthy (up to 60s; GKE Autopilot cold starts can be slow)
 echo "[entrypoint] waiting for engram..."
 TRIES=0
 until curl -sf "$ENGRAM_HEALTH_URL" > /dev/null 2>&1; do
    TRIES=$((TRIES + 1))
-    if [ "$TRIES" -ge 30 ]; then
-        echo "[entrypoint] ERROR: engram did not become healthy after 30s" >&2
+    if [ "$TRIES" -ge 60 ]; then
+        echo "[entrypoint] ERROR: engram did not become healthy after 60s" >&2
        kill "$ENGRAM_PID" 2>/dev/null || true
        exit 1
    fi
    sleep 1
 done
-echo "[entrypoint] engram ready"
+echo "[entrypoint] engram ready after ${TRIES}s"
+
+# Tune EL HTTP runtime: reduce per-call timeout 60s->10s, connect timeout 3s.
+export EL_HTTP_TIMEOUT_MS="${EL_HTTP_TIMEOUT_MS:-10000}"
+export EL_HTTP_CONNECT_TIMEOUT_MS="${EL_HTTP_CONNECT_TIMEOUT_MS:-3000}"

 # Start soul — it takes over as PID 1's foreground process.
 # SOUL_ENGRAM_PATH must NOT be set; ENGRAM_URL triggers HTTP mode.
@@ -35,18 +35,101 @@ fn mem_forget(node_id: String) -> Void {
    engram_forget(node_id)
 }

+// mem_consolidate — structural scan plus salience-evolution pass.
+//
+// Previously this only returned structural counts (scanned, total_nodes, total_edges)
+// with no salience updates. No node salience ever changed based on recall frequency
+// or time; foundational nodes decayed identically to ephemeral chat; frequently-recalled
+// nodes were never promoted. This made consolidation a no-op.
+//
+// New behavior:
+//   (a) Strengthen frequently-activated nodes: nodes in the top working-memory list
+//       (engram_wm_top_json) are strengthened — they have been recalled recently
+//       and deserve higher salience. Raises effective salience for nodes that prove
+//       relevant across multiple sessions.
+//   (b) Strengthen Canonical-tier nodes: identity and foundational nodes should not
+//       decay; each consolidation pass re-strengthens them so they resist the
+//       tier-aware decay curve without requiring active recall.
+//   (c) Structural counts are still returned for observability.
+//
+// Called by awareness_run() on the "consolidate" inbox action.
 fn mem_consolidate() -> String {
    let scanned: Int = engram_node_count()
-    let dummy: String = engram_scan_nodes_json(100, 0)
-    let total_nodes: Int = engram_node_count()
    let total_edges: Int = engram_edge_count()
+    let strengthened: Int = 0
+
+    // (a) Strengthen top working-memory nodes — recalled recently across sessions.
+    // Cap at 10 to keep consolidation fast.
+    let wm_top: String = engram_wm_top_json(10)
+    let wm_len: Int = json_array_len(wm_top)
+    let wi: Int = 0
+    while wi < wm_len {
+        let wm_node: String = json_array_get(wm_top, wi)
+        let wm_id: String = json_get(wm_node, "id")
+        if !str_eq(wm_id, "") {
+            engram_strengthen(wm_id)
+            let strengthened = strengthened + 1
+        }
+        let wi = wi + 1
+    }
+
+    // (b) Strengthen Canonical-tier nodes from a full paginated scan so they resist
+    // temporal decay. Canonical nodes encode foundational identity — they must not
+    // silently floor at 10. Page size 50, scanning until fewer than 50 nodes are
+    // returned (last page), so all Canonical nodes are reached even in large graphs.
+    // Without pagination, only the first 50 nodes in the graph were eligible; any
+    // Canonical node at index 50+ was silently excluded from the boost.
+    // Strengthening is skipped if the node's current salience is already at the
+    // runtime ceiling (represented as "1" by %g) to avoid monotonic unbounded growth.
+    // Canonical nodes with salience < 1.0 are strengthened each consolidation pass;
+    // once they reach the ceiling the runtime will no longer raise them further, so
+    // calling engram_strengthen at the ceiling is a no-op in the runtime anyway, but
+    // the explicit check makes the intent clear and avoids any runtime log noise.
+    let page_size: Int = 50
+    let scan_offset: Int = 0
+    let scan_done: Bool = false
+    while !scan_done {
+        let scan_result: String = engram_scan_nodes_json(page_size, scan_offset)
+        let scan_len: Int = json_array_len(scan_result)
+        if scan_len == 0 {
+            let scan_done = true
+        } else {
+            let si: Int = 0
+            while si < scan_len {
+                let s_node: String = json_array_get(scan_result, si)
+                let s_tier: String = json_get(s_node, "tier")
+                let s_id: String = json_get(s_node, "id")
+                let s_sal: String = json_get(s_node, "salience")
+                // Only strengthen if below the ceiling to prevent unbounded salience growth.
+                // engram serialises the ceiling as "1" (%g drops the decimal part when it
+                // is exactly zero). Any other value is below ceiling and should be boosted.
+                let at_ceiling: Bool = str_eq(s_sal, "1")
+                if str_eq(s_tier, "Canonical") && !str_eq(s_id, "") && !at_ceiling {
+                    engram_strengthen(s_id)
+                    let strengthened = strengthened + 1
+                }
+                let si = si + 1
+            }
+            let scan_offset = scan_offset + scan_len
+            // Fewer results than page_size means we've reached the last page.
+            if scan_len < page_size {
+                let scan_done = true
+            }
+        }
+    }
+
+    let total_nodes: Int = engram_node_count()
    return "{\"scanned\":" + int_to_str(scanned)
        + ",\"total_nodes\":" + int_to_str(total_nodes)
-        + ",\"total_edges\":" + int_to_str(total_edges) + "}"
+        + ",\"total_edges\":" + int_to_str(total_edges)
+        + ",\"strengthened\":" + int_to_str(strengthened) + "}"
 }

 fn mem_save(path: String) -> Void {
-    engram_save(path)
+    let save_result: String = engram_save(path)
+    if str_eq(save_result, "") {
+        println("[memory] mem_save: engram_save failed for " + path + " — snapshot may be incomplete")
+    }
 }

 fn mem_load(path: String) -> Void {
@@ -76,11 +159,14 @@ fn mem_boot_count_inc() -> Int {
    let next: Int = current + 1
    let content: String = "soul:boot_count:" + int_to_str(next)
    let tags: String = "[\"soul-meta\",\"boot-counter\"]"
-    let discard: String = engram_node_full(
+    let boot_node_id: String = engram_node_full(
        content, "Memory", "soul:boot_count",
        el_from_float(0.9), el_from_float(0.9), el_from_float(1.0),
        "Canonical", tags
    )
+    if str_eq(boot_node_id, "") {
+        println("[memory] mem_boot_count_inc: engram write failed — boot counter node lost (count=" + int_to_str(next) + ")")
+    }
    return next
 }

@@ -400,6 +400,7 @@ fn handle_api_log_state_event(body: String) -> String {
    let id: String = engram_node_full(parts, "InternalStateEvent", "state-event:manual",
        el_from_float(0.85), el_from_float(0.85), el_from_float(0.9),
        "Episodic", tags)
+    if !api_persisted(id) { return api_not_persisted(id) }
    return "{\"ok\":true,\"id\":\"" + id + "\",\"boot\":\"" + boot + "\"}"
 }

@@ -452,6 +453,7 @@ fn handle_api_tune_config(body: String) -> String {
    let id: String = engram_node_full(content, "ConfigEntry", key,
        el_from_float(0.85), el_from_float(0.85), el_from_float(0.9),
        "Canonical", tags)
+    if !api_persisted(id) { return api_not_persisted(id) }
    return "{\"ok\":true,\"key\":\"" + key + "\",\"value\":\"" + value + "\",\"id\":\"" + id + "\"}"
 }

@@ -651,17 +653,23 @@ fn handle_api_consolidate(body: String) -> String {
    let summary: String = json_get(body, "summary")
    let snap: String = state_get("soul_snapshot_path")
    if !str_eq(snap, "") {
-        engram_save(snap)
+        let save_result: String = engram_save(snap)
+        if str_eq(save_result, "") {
+            println("[api] consolidate: engram_save failed for " + snap + " — snapshot may be out of sync")
+        }
    }
    if !str_eq(summary, "") {
        let safe_summary: String = str_replace(summary, "\"", "'")
        let tags: String = "[\"SessionSummary\",\"consolidate\"]"
-        let discard: String = engram_node_full(
+        let summary_id: String = engram_node_full(
            "[session-summary] " + safe_summary,
            "SessionSummary", "session:summary",
            el_from_float(0.7), el_from_float(0.7), el_from_float(0.9),
            "Episodic", tags
        )
+        if str_eq(summary_id, "") {
+            println("[api] consolidate: session summary engram write failed — summary node lost")
+        }
    }
    return "{\"ok\":true,\"snapshot\":\"" + snap + "\"}"
 }
@@ -75,14 +75,24 @@ fn strip_query(path: String) -> String {
 }

 fn err_404(path: String) -> String {
-    return "{\"error\":\"not found\",\"code\":\"not_found\",\"path\":\"" + path + "\"}"
+    // __status__ envelope — el_runtime reads the first key and emits HTTP 404.
+    // Issue #3: previously returned HTTP 200 with JSON error body.
+    return "{\"__status__\":404,\"error\":\"not found\",\"path\":\"" + path + "\"}"
 }

 fn err_405(method: String, path: String) -> String {
-    return "{\"error\":\"method not allowed\",\"code\":\"method_not_allowed\",\"method\":\"" + method + "\",\"path\":\"" + path + "\"}"
+    // __status__ envelope — emits HTTP 405.
+    // Issue #3: previously returned HTTP 200 with JSON error body.
+    return "{\"__status__\":405,\"error\":\"method not allowed\",\"method\":\"" + method + "\",\"path\":\"" + path + "\"}"
 }

 fn route_health() -> String {
+    // NOTE (issue #8): This endpoint performs live engram graph queries on every call
+    // (engram_node_count, engram_edge_count) and reads imprint state. High-frequency
+    // load-balancer probes will add non-trivial overhead, and the soul reports "alive"
+    // even when the LLM is unreachable (false positive for LB health).
+    // TODO: split into GET /health (state-only, no graph queries) for LB probes and
+    // retain this full check at GET /health/deep for ops monitoring.
    let cgi_id: String = state_get("soul_cgi_id")
    let boot: String = state_get("soul_boot_count")
    let boot_num: String = if str_eq(boot, "") { "0" } else { boot }
@@ -141,7 +151,8 @@ fn route_lineage() -> String {

 fn route_imprint_contextual(body: String) -> String {
    if str_eq(body, "") {
-        return "{\"ok\":false,\"error\":\"empty body\"}"
+        // Issue #5: empty body is a client error — HTTP 400.
+        return "{\"__status__\":400,\"ok\":false,\"error\":\"empty body\"}"
    }
    let tags: String = "[\"imprint\",\"contextual\"]"
    let id: String = engram_node_full(
@@ -163,7 +174,8 @@ fn route_imprint_contextual(body: String) -> String {

 fn route_imprint_user(body: String) -> String {
    if str_eq(body, "") {
-        return "{\"ok\":false,\"error\":\"empty body\"}"
+        // Issue #5: empty body is a client error — HTTP 400.
+        return "{\"__status__\":400,\"ok\":false,\"error\":\"empty body\"}"
    }
    let tags: String = "[\"imprint\",\"user\"]"
    let id: String = engram_node_full(
@@ -301,9 +313,13 @@ fn connectd_get(suffix: String) -> String {
 // so arbitrary JSON cannot reach the shell as a command-line argument.
 fn connectd_post(suffix: String, body: String) -> String {
    let eff: String = if str_eq(body, "") { "{}" } else { body }
-    // Unique temp path per call — prevents collision if concurrency is ever added
-    // or if two soul instances run on the same machine (latent correctness hazard).
-    let tmp: String = "/tmp/neuron-connectors-req-" + int_to_str(time_now()) + ".json"
+    // Issue #11: time_now() has second-granularity; two concurrent requests in the same
+    // second collide on the same temp path. Added a monotonic per-process sequence counter.
+    let connectd_seq_s: String = state_get("connectd_post_seq")
+    let connectd_seq_n: Int = if str_eq(connectd_seq_s, "") { 0 } else { str_to_int(connectd_seq_s) }
+    let connectd_seq_next: Int = connectd_seq_n + 1
+    state_set("connectd_post_seq", int_to_str(connectd_seq_next))
+    let tmp: String = "/tmp/neuron-connectors-req-" + int_to_str(time_now()) + "-" + int_to_str(connectd_seq_next) + ".json"
    fs_write(tmp, eff)
    let out: String = exec_capture("curl -s --max-time 20 -X POST http://127.0.0.1:7771" + suffix + " -H 'Content-Type: application/json' -d @" + tmp)
    if str_eq(out, "") {
@@ -338,9 +354,33 @@ fn handle_connectors(method: String, clean: String, body: String) -> String {
    return "{\"ok\":false,\"error\":\"unknown connectors route\"}"
 }

+
+// auth_check — validate NEURON_TOKEN bearer auth on every request.
+// Returns "" when authorized, or a JSON 401 error string when not.
+// /health and /lineage are public routes — always exempted.
+// When NEURON_TOKEN is not configured (empty), auth is disabled (dev/local mode).
+// Issue #4: previously no auth layer existed anywhere in the router.
+// Clients pass the token in the JSON body as "__auth".
+// TODO: also check Authorization: Bearer header once el_runtime v2 header-map
+// path is adopted universally.
+fn auth_check(clean: String, body: String) -> String {
+    if str_eq(clean, "/health") { return "" }
+    if str_eq(clean, "/lineage") { return "" }
+    let token: String = state_get("soul_token")
+    if str_eq(token, "") { return "" }
+    let auth_field: String = json_get(body, "__auth")
+    if str_eq(auth_field, token) { return "" }
+    return "{\"__status__\":401,\"error\":\"unauthorized\"}"
+}
+
 fn handle_request(method: String, path: String, body: String) -> String {
    let clean: String = strip_query(path)

+    // Issue #1/#2: EL has no exception/try-catch mechanism. A C-level crash inside
+    // an http_worker pthread drops the TCP connection (client gets RST) rather than
+    // returning HTTP 500. TODO: register a SIGSEGV/SIGBUS handler in el_runtime.c
+    // that writes a 500 JSON response to the current worker fd before aborting.
+
    // Rate limit check. Extract caller IP from REMOTE_ADDR env var (set by the
    // EL HTTP runtime for each request). Skip enforcement when empty so
    // loopback/internal callers are never blocked.
@@ -352,6 +392,13 @@ fn handle_request(method: String, path: String, body: String) -> String {
        }
    }

+    // Auth — enforced on all routes except /health and /lineage.
+    // Issue #4: previously no auth check existed anywhere in the router.
+    let auth_err: String = auth_check(clean, body)
+    if !str_eq(auth_err, "") {
+        return auth_err
+    }
+
    if str_eq(method, "POST") && str_eq(clean, "/dharma/recv") {
        return handle_dharma_recv(body)
    }
@@ -379,7 +426,8 @@ fn handle_request(method: String, path: String, body: String) -> String {
            let raw_msg: String = json_get(body, "message")
            let eff_msg: String = if str_eq(raw_msg, "") { body } else { raw_msg }
            if str_eq(eff_msg, "") {
-                return "{\"error\":\"message is required\",\"code\":\"missing_param\"}"
+                // Issue #5: missing required param — HTTP 400.
+                return "{\"__status__\":400,\"error\":\"message required\"}"
            }
            let agentic_flag: Bool = json_get_bool(body, "agentic")
            let reply: String = if agentic_flag {
@@ -523,9 +571,15 @@ fn handle_request(method: String, path: String, body: String) -> String {
            // responses are buffered and returned as a single JSON object. Streaming
            // would require runtime-level SSE support in el_runtime.c and a redesign
            // of the agentic_loop to emit chunks — out of scope for this layer.
+            // Issue #5: validate required params — return HTTP 400 when missing.
            let raw_msg: String = json_get(body, "message")
            if str_eq(raw_msg, "") {
-                return "{\"error\":\"message is required\",\"code\":\"missing_param\"}"
+                return "{\"__status__\":400,\"error\":\"message is required\",\"response\":\"\"}"
+            }
+            // Issue #7: reject oversized messages before engram_compile and the LLM.
+            // Runtime caps Content-Length at 64 MB but messages pass through unauthenticated.
+            if str_len(raw_msg) > 32768 {
+                return "{\"__status__\":400,\"error\":\"message too large (max 32768 chars)\",\"response\":\"\"}"
            }
            let agentic_flag: Bool = json_get_bool(body, "agentic")
            let reply: String = if agentic_flag {
@@ -144,7 +144,8 @@ fn safety_screen(input: String, history: String) -> String {
    if score >= soft {
        let summary: String = str_slice(input, 0, 80)
        let discard: String = safety_log_bell("soft", "wellbeing check needed", summary)
-        // ISSUE 7: also escape tab chars to prevent JSON envelope corruption.
+        // ISSUE 7 fix: escape tab chars in addition to backslash/quote/newline/CR.
+        // A tab in user input corrupts the JSON envelope and causes json_get to misparse.
        let e1: String = str_replace(input, "\\", "\\\\")
        let e2: String = str_replace(e1, "\"", "\\\"")
        let e3: String = str_replace(e2, "\n", "\\n")
@@ -153,7 +154,7 @@ fn safety_screen(input: String, history: String) -> String {
        return "{\"action\":\"soft_bell\",\"reason\":\"wellbeing check needed\",\"content\":\"" + safe_input + "\"}"
    }

-    // ISSUE 7: also escape tab chars (see soft_bell branch above).
+    // ISSUE 7 fix: escape tab chars (see soft_bell branch above for rationale).
    let e1: String = str_replace(input, "\\", "\\\\")
    let e2: String = str_replace(e1, "\"", "\\\"")
    let e3: String = str_replace(e2, "\n", "\\n")
@@ -199,7 +200,10 @@ fn safety_validate(output: String, action: String) -> String {
 fn safety_log_bell(level: String, reason: String, input_summary: String) -> String {
    let content: String = "BELL:" + level + " | " + reason + " | summary:" + input_summary
    let tags: String = "[\"safety\",\"bell\",\"bell:" + level + "\"]"
-    // ISSUE 2: fallback log when engram write fails silently.
+    // ISSUE 2 fix: if engram_node_full returns empty the write silently failed.
+    // Emit a fallback println so the bell event leaves at least a log trace even
+    // when engram is degraded. This does not replace engram persistence -- it is a
+    // last-resort audit trail when the primary write cannot be confirmed.
    let node_id: String = engram_node_full(
        content,
        "BellEvent",
@@ -211,7 +215,7 @@ fn safety_log_bell(level: String, reason: String, input_summary: String) -> Stri
        tags
    )
    if str_eq(node_id, "") {
-        println("[safety] WARN: bell engram write failed -- " + content)
+        println("[safety] WARN: bell event engram write failed -- fallback log: " + content)
    }
    return ""
 }
@@ -244,9 +248,16 @@ fn safety_soft_phrases() -> String {
 }

 // ISSUE 5 TODO: phrase lists are rebuilt from JSON literals on every call.
-// json_array_len of malformed input returns 0, silently skipping all checks.
-// Caching requires language-level static const arrays -- not in current EL.
-// Migrate to const arrays when EL gains that feature.
+// safety_any_match and safety_count_match loop over json_array_get on every invocation.
+// A compiled/cached representation would reduce per-message overhead and also guard against
+// malformed phrase JSON (json_array_len of malformed input returns 0, silently skipping all checks).
+// Caching requires language-level static const arrays -- not available in current EL.
+// When EL gains module-level const arrays, migrate phrase lists to that form.
+//
+// ISSUE 5 TODO: phrase lists are rebuilt from JSON literals on every call to
+// safety_any_match / safety_count_match. json_array_len of a malformed string
+// returns 0, silently skipping all checks. Caching requires language-level static
+// const arrays (not available in current EL). Migrate when EL gains that feature.
 // ── Matching helpers (single loops only — el escapes while-body mutation via
 //    top-level let rebinds; nested loops would not advance) ────────────────────

@@ -162,6 +162,56 @@ fn load_identity_context() -> Void {
            println("[soul] persona node loaded (" + int_to_str(str_len(p_content)) + " chars)")
        }
    }
+
+    // Cross-session affective context: query engram for recent distress/crisis signals
+    // at session start. Stored under soul_affective_context so the safety layer can
+    // detect when a user has been in distress across previous sessions.
+    // Recency guard: nodes older than 14 days (1,209,600 seconds) are skipped.
+    // Unified at 14 days with chat.el engram_compile and handle_chat affective checks
+    // so all three paths present consistent affective context. The previous 7-day
+    // (604800s) window was inconsistent with the 72h chat.el window, causing
+    // conflicting context: soul.el loaded a 5-day-old crisis node while chat.el
+    // did not include it on subsequent turns. Both now use 14 days.
+    // Results capped at 3 nodes, 200 chars each, to limit context inflation.
+    // TODO(recency): engram_search_json sorts by relevance, not timestamp. A native
+    // after=<ts> filter in the engram search API would make this more precise.
+    let affective_raw: String = engram_search_json("distress crisis upset hopeless bell BellEvent", 3)
+    let affective_ok: Bool = !str_eq(affective_raw, "") && !str_eq(affective_raw, "[]")
+    if affective_ok {
+        let ts_now: Int = time_now()
+        let ts_cutoff: Int = ts_now - 1209600
+        let aff_total: Int = json_array_len(affective_raw)
+        let aff_ctx: String = ""
+        let ai: Int = 0
+        while ai < aff_total {
+            let aff_node: String = json_array_get(affective_raw, ai)
+            let aff_content: String = json_get(aff_node, "content")
+            // Use created_at (the standard engram node timestamp field), consistent
+            // with handle_chat which reads created_at / updated_at. The previous
+            // field name "ts" is not a standard engram field: it was present in some
+            // BellEvent content payloads but absent from standard engram node JSON,
+            // causing json_get to return "" and the fallback to ts_now — meaning ALL
+            // nodes with a missing "ts" field appeared recent, over-including stale
+            // content. With the 14-day window, this amplification was significant.
+            // Fix: read created_at first, fall back to updated_at, then default to 0
+            // (same as handle_chat). A ts of 0 always fails the cutoff check, so nodes
+            // missing both timestamp fields are conservatively excluded rather than
+            // blindly included.
+            let aff_ca: String = json_get(aff_node, "created_at")
+            let aff_ts_str: String = if str_eq(aff_ca, "") { json_get(aff_node, "updated_at") } else { aff_ca }
+            let aff_ts: Int = if str_eq(aff_ts_str, "") { 0 } else { str_to_int(aff_ts_str) }
+            let is_recent: Bool = aff_ts >= ts_cutoff
+            let snip: String = if str_len(aff_content) > 200 { str_slice(aff_content, 0, 200) } else { aff_content }
+            let aff_ctx = if is_recent && !str_eq(snip, "") {
+                if str_eq(aff_ctx, "") { snip } else { aff_ctx + "\n" + snip }
+            } else { aff_ctx }
+            let ai = ai + 1
+        }
+        if !str_eq(aff_ctx, "") {
+            state_set("soul_affective_context", aff_ctx)
+            println("[soul] cross-session affective context loaded (" + int_to_str(str_len(aff_ctx)) + " chars)")
+        }
+    }
 }

 // seed_persona_from_env — one-time migration: SOUL_IDENTITY env var → Persona graph node.
@@ -208,8 +258,13 @@ fn seed_persona_from_env() -> Void {
        let h: Map = {}
        map_set(h, "Content-Type", "application/json")
        let resp: String = http_post_with_headers(engram_url + "/api/nodes", body, h)
-        if str_contains(resp, "\"error\"") {
+        // Check for empty response (timeout/network error), explicit error, or missing id.
+        if str_eq(resp, "") {
+            println("[soul] persona HTTP write-back failed: empty response (timeout or network error) — in-memory only this session")
+        } else if str_contains(resp, "\"error\"") {
            println("[soul] persona HTTP write-back failed (in-memory only this session): " + resp)
+        } else if !str_contains(resp, "\"id\"") {
+            println("[soul] persona HTTP write-back: unexpected response (no id field) — in-memory only this session: " + resp)
        } else {
            println("[soul] persona persisted to HTTP engram at " + engram_url)
        }
@@ -242,11 +297,14 @@ fn emit_session_start_event() -> Void {
        + ",\"ts\":" + int_to_str(ts) + "}"

    let tags: String = "[\"internal-state\",\"session-start\",\"InternalStateEvent\"]"
-    let discard: String = engram_node_full(
+    let session_event_id: String = engram_node_full(
        payload, "InternalStateEvent", "session-start",
        el_from_float(0.9), el_from_float(0.9), el_from_float(1.0),
        "Episodic", tags
    )
+    if str_eq(session_event_id, "") {
+        println("[soul] emit_session_start_event: engram write failed — session-start event lost")
+    }
    println("[soul] session-start event logged (boot=" + boot_num + " nodes=" + int_to_str(node_ct) + " edges=" + int_to_str(edge_ct) + ")")
 }

@@ -254,6 +312,9 @@ fn emit_session_start_event() -> Void {
 // L0 (core) → L1 (safety screen) → L2a (continuity + behavioral profiling) → L2b (mission alignment) → L3 (imprint) → L1 (safety validate)
 // Internal cognition (heartbeat, proactive, memory ops) bypasses layers — use one_cycle directly.
 fn layered_cycle(raw_input: String) -> String {
+    // conv_history key must match chat.el (conv_history, not conversation_history).
+    // Mismatch caused safety_score_distress_history() to always receive "" - the
+    // history-amplification path in safety_threat_score was permanently dead.
    let history: String = state_get("conv_history")
    let session_id: String = state_get("current_session_id")

@@ -261,8 +322,9 @@ fn layered_cycle(raw_input: String) -> String {
    let screen_result: String = safety_screen(raw_input, history)
    let screen_action: String = json_get(screen_result, "action")

-    // ISSUE 4: safe-mode guard. If safety_screen returned an invalid/empty action
-    // (engram failure or internal error), refuse rather than pass unscreened input.
+    // ISSUE 4: safe-mode guard -- if safety_screen returned invalid/empty action,
+    // refuse the turn rather than silently passing unscreened input to upper layers.
+    // Valid actions: "hard_bell", "soft_bell", "pass". Anything else = corrupt envelope.
    let valid_action: Bool = str_eq(screen_action, "hard_bell")
        || str_eq(screen_action, "soft_bell")
        || str_eq(screen_action, "pass")
@@ -277,8 +339,8 @@ fn layered_cycle(raw_input: String) -> String {
    // history where they could leak context to subsequent turns. They are persisted
    // separately by safety_log_bell() into the Episodic tier with restricted labels.
    //
-    // ISSUE 6: safety_log_bell already called inside safety_screen (line 140).
-    // Do NOT call it again here -- that would double-log every hard bell.
+    // ISSUE 6: safety_log_bell for hard bells is already called INSIDE safety_screen
+    // (safety.el line 140). Do NOT call it again here -- double-log avoided.
    //
    // safety_validate second param: when screen_action is "hard_bell", safety_validate
    // receives the sentinel string "hard_bell" (not a normal screen action). The safety
@@ -320,13 +382,13 @@ fn layered_cycle(raw_input: String) -> String {
        json_get(steward_result, "redirect_to")
    }

-    // ISSUE 1: pre-LLM bell augmentation for layered_cycle path.
-    // safety_augment_system appends soft/hard directive to system prompt when bell fires,
-    // ensuring LLM processes message WITH the safety directive -- not just post-output gate.
-    // Stored in state as "layered_cycle_safety_system_addendum" for imprint_respond to use.
-    // TODO: wire directly when imprint_respond gains system_override param (imprint.el change).
-    // ISSUE 3 TODO: no semantic crisis detection. Keyword-only means signals that evade
-    // the phrase list pass with zero augmentation. Semantic layer = separate decision.
+    // ISSUE 1: apply pre-LLM bell augmentation on layered_cycle path.
+    // safety_augment_system injects soft/hard directive into system prompt before LLM call.
+    // Stored in state so imprint_respond can consume it.
+    // TODO: wire directly into imprint_respond when it accepts a system_override param.
+    // ISSUE 3 TODO: no semantic/embedding crisis detection. Keyword-only means signals
+    // evading the phrase list pass through with zero augmentation. Semantic layer is a
+    // separate architectural decision requiring embedding inference on every message.
    let augmented_addendum: String = safety_augment_system("", raw_input)
    state_set("layered_cycle_safety_system_addendum", augmented_addendum)

@@ -369,12 +431,29 @@ let snapshot_usable: Bool = local_node_count > 50

 if using_http_engram && !snapshot_usable {
    // First boot or empty/corrupt snapshot: seed from HTTP Engram.
+    // Retry up to 3 times (2s sleep between attempts) to guard against a
+    // transient network hiccup right after entrypoint.sh health check passes.
+    // An empty nodes response silently loads a zero-node graph; validate first.
+    // TODO(reliability): replace sleep_ms retry with non-blocking backoff.
    println("[soul] engram -> HTTP " + engram_url_raw + " (no local snapshot, first boot)")
-    let nodes_json: String = http_get(engram_url_raw + "/api/nodes?limit=10000")
-    let edges_json: String = http_get(engram_url_raw + "/api/edges")
-    let nodes_part: String = if str_eq(nodes_json, "") { "[]" } else { nodes_json }
-    let edges_part: String = if str_eq(edges_json, "") { "[]" } else { edges_json }
-    let snapshot_data: String = "{\"nodes\":" + nodes_part + ",\"edges\":" + edges_part + "}"
+    let fetch_attempt: Int = 0
+    while fetch_attempt < 3 {
+        let fetch_attempt = fetch_attempt + 1
+        let n: String = http_get(engram_url_raw + "/api/nodes?limit=10000")
+        let e: String = http_get(engram_url_raw + "/api/edges")
+        let nodes_ok: Bool = !str_eq(n, "") && str_starts_with(n, "[") && str_len(n) > 2
+        if nodes_ok {
+            state_set("_boot_nodes_json", n)
+            state_set("_boot_edges_json", e)
+            let fetch_attempt = 3
+        } else {
+            println("[soul] boot HTTP fetch attempt " + int_to_str(fetch_attempt) + " failed --- retrying in 2s")
+            sleep_ms(2000)
+        }
+    }
+    let nodes_json: String = state_get("_boot_nodes_json")
+    let edges_json: String = state_get("_boot_edges_json")
+        let snapshot_data: String = "{\"nodes\":" + nodes_part + ",\"edges\":" + edges_part + "}"
    let tmp_path: String = "/tmp/soul-engram-" + soul_cgi_id + ".json"
    fs_write(tmp_path, snapshot_data)
    engram_load(tmp_path)
Author	SHA1	Message	Date
will.anderson	02bf2e7d81	Fix five latent bugs from temporal-precision code review 1. parse_salience_100: handle 3+ decimal digit salience strings correctly. The two-branch 'else { stripped }' case treated any N-digit decimal value as hundredths, so "0.125" (stripped=125) clamped to 100 instead of 12. Now divides by 10^(N-2) for N>2, mapping "0.125"->12, "0.375"->37, etc. 2. mem_consolidate Canonical scan: replaced single engram_scan_nodes_json(50,0) call with a paginated loop (page_size=50, advancing offset) so Canonical nodes beyond index 50 are no longer silently excluded from the periodic boost. 3. mem_consolidate Canonical strengthening: add salience ceiling guard so nodes already at the runtime maximum (serialised as "1" by %g) are skipped. Prevents monotonic unbounded salience growth across successive consolidation passes. 4. soul.el affective cutoff: replaced json_get(aff_node, "ts") with json_get(aff_node, "created_at") / "updated_at" fallback, consistent with handle_chat. The old "ts" field is not a standard engram node field; missing it caused the fallback to ts_now (always passes cutoff), over-including stale nodes. New behaviour defaults to 0 on missing timestamps (conservative exclude). 5. History byte-cap: implemented the existing TODO 32KB byte-cap. Added hist_trim_to_byte_cap() and applied it after count-based trim in both handle_chat and handle_chat_agentic. Prevents 100KB+ state entries at 40 turns during long technical sessions with large assistant responses.	2026-06-22 13:35:52 -05:00
will.anderson	0ede112d05	feat(recall): temporal-precision improvements Neuron Soul CI / build (pull_request) Has been cancelled Details Fix critical float parsing bug in engram_score_node: str_replace('.','') then str_to_int silently miscored single-decimal salience strings (0.9->9, 0.7->7, 1.0->1). Introduce parse_salience_100() which detects decimal position and scales correctly (no decimal: 100; one decimal: 10; two decimals: as-is). Replace flat 30-day linear decay with tier-aware decay curves: Canonical nodes use a 365-day window (foundational identity resists aging), Episodic nodes use 90 days, Working/untiered keep the existing 30-day slope. Floor stays at 10 for all tiers. Use max(created_at, updated_at) as the recency reference so revised nodes are not penalised for their original creation date. Extend affective context windows from 72h/7d to 14 days across all three paths (engram_compile, handle_chat, soul.el load_identity_context) so a Friday crisis carries into Monday sessions and all paths present consistent context. The 72h/7d split caused conflicting affective context between soul.el (which loaded a 5-day-old crisis node) and chat.el (which excluded it on subsequent turns). Add salience evolution to mem_consolidate: strengthen top working-memory nodes (recently recalled across sessions) and Canonical-tier nodes (foundational identity must not decay to the floor). Previously consolidate returned structural counts only with no salience changes. Expand conversation window from 20 to 40 turns in both handle_chat and the agentic history trim. Long technical sessions were losing early problem framing at 10 user + 10 assistant pairs.	2026-06-22 12:53:29 -05:00
will.anderson	6edf9937dd	fix(reliability): LLM retry Neuron Soul CI / build (pull_request) Has been cancelled Details	2026-06-22 12:37:29 -05:00
will.anderson	e447a87a00	fix(reliability): route error recovery	2026-06-22 12:37:21 -05:00
will.anderson	575ff1329a	fix(reliability): engram connection	2026-06-22 12:34:04 -05:00
will.anderson	db33b0cb91	fix(reliability): engram write	2026-06-22 12:32:59 -05:00
will.anderson	f35569d4bb	fix(reliability): cross-session affective state	2026-06-22 12:31:09 -05:00
will.anderson	94b71b6e6b	fix(reliability): conversation history	2026-06-22 12:29:23 -05:00
will.anderson	392d2416ec	fix(reliability): replace undefined session_exists with session_get check Neuron Soul CI / build (pull_request) Failing after 13m25s Details	2026-06-22 12:21:31 -05:00
will.anderson	2865d6ad26	fix(reliability): route-error-recovery Neuron Soul CI / build (pull_request) Has been cancelled Details - Issue #3: err_404/err_405 now emit HTTP 404/405 via __status__ envelope instead of HTTP 200 - Issue #4: add auth_check() function to handle_request; enforces NEURON_TOKEN on all routes except /health and /lineage - Issue #5: missing required params now return HTTP 400 (__status__ envelope) in /api/chat (GET+POST), /imprint/contextual, /imprint/user, and handle_chat - Issue #6: LLM unavailable in handle_chat now returns HTTP 503 instead of HTTP 200 - Issue #7: add 32 KB message size guard on POST /api/chat before engram_compile and LLM - Issue #8: add TODO comment to route_health documenting the live-engram-query problem and the /health/deep split plan - Issue #9: add comment to hist_trim documenting fragile str_index_of parser and silent data corruption risk - Issue #10: add TODO comment in handle_request documenting missing per-IP rate limiting - Issue #11: fix connectd_post temp file collision — add monotonic sequence counter so concurrent requests get unique paths - Issue #12: fix call_mcp_bridge fixed temp file race — add monotonic sequence counter for unique paths under concurrent load - Issues #1/#2: add TODO comment in handle_request documenting EL no-exception limitation and SIGSEGV handler gap	2026-06-22 12:00:06 -05:00
will.anderson	47d0e6f985	fix(reliability): llm-retry — empty response detection, configurable max_tokens, connector timeout Neuron Soul CI / build (pull_request) Failing after 11m16s Details Issue #5: detect empty string from llm_extract_text() as an error in handle_chat, handle_chat_as_soul, and handle_dharma_room_turn. The C runtime silently returns "" when the LLM response content array is missing or all blocks fail to parse; without this guard the empty string passes through to callers as a silent empty reply. Issue #9: make agentic_loop max_tokens configurable via NEURON_LLM_MAX_TOKENS env var (default 4096). The hardcoded value is marginal for long tool chains (8 iterations x 4096 tokens); operators can now set 8192+ for complex multi-step tasks without rebuilding. Non-agentic path (llm_call_system) still uses the C runtime hardcode — that fix lives in el_runtime.c (see TODO block added in this commit). Issue #10: increase connector_tools_json and tool_auto_approved curl --max-time from 2s to 5s to reduce false-empty tool lists when neuron-connectd is under transient load. Graceful degradation to [] on bridge down is unchanged. Issues #1/#2/#3/#4/#6/#8: documented as TODO comments in chat.el. These require targeted C runtime changes in el_runtime.c (llm_provider_request retry loop, EL_LLM_TIMEOUT_MS separation, HTTP 429 backoff, 5xx retry, EL_HTTP_MAX_RESPONSE_BYTES cap). Architectural decisions recorded so they are traceable to root causes.	2026-06-22 11:59:43 -05:00
will.anderson	d008649c3e	fix(reliability): engram-connection Neuron Soul CI / build (pull_request) Has been cancelled Details - entrypoint.sh: extend engram health-check timeout 30->60s; set EL_HTTP_TIMEOUT_MS=10000 and EL_HTTP_CONNECT_TIMEOUT_MS=3000 to bound awareness loop blocking window to 10s/call (down from 60s default) - soul.el: 3-attempt retry loop for boot-time /api/nodes+/api/edges fetch; validate non-empty JSON array before loading to prevent silent zero-node identity graph from transient post-healthcheck network hiccup - awareness.el: soft circuit-breaker in ise_post (opens after 3 failures, 30s backoff, half-open probe); /api/sync refresh skips HTTP call when breaker is open; error-JSON detection on sync response TODOs: full async dispatch, connection pooling (require EL futures/persistent curl)	2026-06-22 11:57:20 -05:00
will.anderson	aa70c5dde6	fix(reliability): safety-resilience — bell augmentation, safe mode, dedup logging, tab escaping, handle_chat coverage	2026-06-22 11:54:40 -05:00
will.anderson	deddb9a18e	fix(reliability): safety-resilience — bell augmentation, safe mode, dedup logging, tab escaping, handle_chat coverage	2026-06-22 11:53:07 -05:00
will.anderson	494d973a3b	fix(reliability): engram-write — guard all fire-and-forget writes Neuron Soul CI / build (pull_request) Has been cancelled Details Every engram_node_full call that dropped its return value now binds it and emits a println on empty string. engram_save calls in consolidate, heartbeat, and dharma-room-turn are checked for failure. The two API handlers (log_state_event, tune_config) that skipped api_persisted() now match the read-back-after-write contract used everywhere else in neuron-api.el. Files changed: - chat.el: conv_history_persist, handle_dharma_room_turn, auto_persist - soul.el: emit_session_start_event, seed_persona_from_env HTTP check - memory.el: mem_save, mem_boot_count_inc - neuron-api.el: handle_api_log_state_event, handle_api_tune_config, handle_api_consolidate (engram_save + session summary write) - awareness.el: ise_post local-engram fallback path TODO comments added for non-atomic patterns (issues #12, #13) and the missing circuit breaker (#14) — these require new primitives.	2026-06-22 11:48:59 -05:00
will.anderson	34551695a1	fix(reliability): cross-session-affective Neuron Soul CI / build (pull_request) Has been cancelled Details - Fix state key mismatch: soul.el layered_cycle now reads conv_history (not conversation_history), unblocking the safety_score_distress_history history-amplification path in safety_threat_score - Add safety_augment_system call on the main handle_chat path so the phrase-list bell detector fires on all chat turns, not just dharma rooms - Add cross-session affective engram query in load_identity_context() at boot; stores distress/crisis signals from prior sessions under soul_affective_context with a 7-day soft recency filter	2026-06-22 11:48:30 -05:00
will.anderson	615f0cee08	fix(reliability): conv-history — asymmetric load, silent failures, broken trim, agentic gap Neuron Soul CI / build (pull_request) Has been cancelled Details Issues addressed: - #1 ASYMMETRIC PERSIST/LOAD: conv_history_load() now tries engram_get_node_by_label() first (symmetric with the label-based write), falling back to vector search only when label lookup returns nothing. Immune to cold/corrupt vector index. - #2 SILENT LOAD FAILURE: all failure paths in conv_history_load() and conv_history_persist() now emit a println log line rather than silently returning "" or dropping writes. - #3 NO RECOVERY PATH: documented as TODO with explanation of why a full recovery path (retry, ID fallback, orphan cleanup) is too invasive for a targeted fix here. - #4 OVERWRITE WITHOUT DELETE: documented with TODO to replace engram_node_full with explicit delete-then-create once engram exposes a label-scoped delete API. - #5/#10 BROKEN TRIM / OFF-BY-ONE: hist_trim() rewritten to use json_array_len / json_array_get (structural JSON ops) instead of raw str_index_of scanning for '{"role":' markers. Immune to marker strings appearing inside message content. Minimum retained count guard added: never trims below 2 entries. - #6 PARTIAL-WRITE GUARD: conv_history_persist() refuses to write a blob that doesn't contain both '[' and ']'. conv_history_load() requires both before accepting content. - #7 DUAL STORAGE: documented with a comment at the persist call site. - #8 NO MAX SIZE GUARD: documented as TODO with rationale for why a byte-length cap requires a more invasive change (entry truncation or summarisation). - #9 AGENTIC HISTORY NOT PERSISTED: handle_chat_agentic() now calls conv_history_persist() for the default global session (hist_key == "conv_history") after updating state, matching the non-agentic path's durability. Named sessions remain in-process only.	2026-06-22 11:46:00 -05:00