fix(emergency): repair session-continuity regressions from prior merge

fix/session-continuity-hook
fix/context-dedup-shared-ids
2026-06-22 14:51:51 -05:00 · 2026-06-22 14:29:31 -05:00 · 2026-06-22 14:29:06 -05:00 · 2026-06-22 14:28:17 -05:00 · 2026-06-22 14:25:29 -05:00 · 2026-06-22 14:19:14 -05:00
1 changed files with 77 additions and 128 deletions
@@ -233,125 +233,7 @@ fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String {
    }
    if str_eq(selected_nodes, "") { return "" }
    return "[" + selected_nodes + "]"
-}ory.el"
-
-fn chat_default_model() -> String {
-    let m: String = state_get("soul_model")
-    if !str_eq(m, "") {
-        return m
-    }
-    let e: String = env("SOUL_LLM_MODEL")
-    if !str_eq(e, "") {
-        return e
-    }
-    return "claude-sonnet-4-5"
 }
-
-// engram_score_node — compute a recency x relevance score for a single engram
-// node JSON object. Higher is better. Score = salience * importance * recency_factor.
-// recency_factor decays linearly over 30 days: nodes updated today score 1.0,
-// nodes 30+ days old score 0.1 (floor). Nodes with no created_at score 0.5.
-// This keeps fresh, high-salience nodes at the top and pushes stale low-signal
-// nodes to the bottom so they get trimmed when we cap context size.
-fn engram_score_node(node_json: String) -> Int {
-    let salience_str: String = json_get(node_json, "salience")
-    let importance_str: String = json_get(node_json, "importance")
-    let created_str: String = json_get(node_json, "created_at")
-
-    // parse_float_x100 handles 1- and 2-decimal floats correctly ("0.9" -> 90, "0.85" -> 85).
-    // Default 70 when field is absent; clamp to 0-100 range.
-    let salience_100: Int = if str_eq(salience_str, "") { 70 } else {
-        let s: Int = parse_float_x100(salience_str)
-        if s > 100 { 100 } else { if s < 0 { 0 } else { s } }
-    }
-    let importance_100: Int = if str_eq(importance_str, "") { 70 } else {
-        let v: Int = parse_float_x100(importance_str)
-        if v > 100 { 100 } else { if v < 0 { 0 } else { v } }
-    }
-
-    // Recency: decay from 100 (today) to 10 (30+ days). created_at is Unix seconds.
-    let now_ts: Int = time_now()
-    let recency_100: Int = if str_eq(created_str, "") { 50 } else {
-        let created_ts: Int = str_to_int(created_str)
-        let age_secs: Int = now_ts - created_ts
-        let age_days: Int = age_secs / 86400
-        let decay: Int = if age_days >= 30 { 10 } else { 100 - (age_days * 3) }
-        if decay < 10 { 10 } else { decay }
-    }
-
-    // Combined score 0-1000000 (no floats): salience * importance * recency / 10000
-    return salience_100 * importance_100 * recency_100 / 10000
-}
-
-// engram_compile_ranked — build a context string from a JSON array of node objects,
-// ordered best-first by score. Only nodes above threshold=25 are included.
-// With corrected float parsing: sal=0.5 * imp=0.5 at max recency (100) scores exactly 25,
-// so threshold=25 admits all nodes with at least moderate salience and importance while
-// cutting near-zero noise. Lower values were masking the bug; 25 is correct post-fix.
-// Returns at most max_nodes entries. max_nodes must not exceed 20 (sentinel limit).
-fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String {
-    if str_eq(nodes_json, "") { return "" }
-    if str_eq(nodes_json, "[]") { return "" }
-    let total: Int = json_array_len(nodes_json)
-    if total == 0 { return "" }
-    let selected_indices: String = ""
-    let selected_nodes: String = ""
-    let pass: Int = 0
-    while pass < max_nodes && pass < total {
-        let best_idx: Int = -1
-        let best_score: Int = -1
-        let ci: Int = 0
-        while ci < total {
-            let node: String = json_array_get(nodes_json, ci)
-            let score: Int = engram_score_node(node)
-            // Threshold 25: sal=0.5 * imp=0.5 * recency=1.0 -> 50*50*100/10000 = 25.
-            let above_thresh: Bool = score >= 25
-            // Check this index wasn't already selected (sentinel: look for idx marker)
-            let idx_marker: String = "\"_sel_" + int_to_str(ci) + "\""
-            let already_picked: Bool = str_contains(selected, idx_marker)
-            let is_better: Bool = score > best_score && above_thresh && !already_picked
-            let best_score = if is_better { score } else { best_score }
-            let best_idx = if is_better { ci } else { best_idx }
-            let ci = ci + 1
-        }
-        if best_idx < 0 {
-            let pass = total  // break
-        } else {
-            let chosen: String = json_array_get(nodes_json, best_idx)
-            let sep: String = if str_eq(selected_nodes, "") { "" } else { "," }
-            let selected_nodes = selected_nodes + sep + chosen
-            let selected_indices = selected_indices + "|" + int_to_str(best_idx) + "|"
-        }
-        let pass = pass + 1
-    }
-    if str_eq(selected_nodes, "") { return "" }
-    return "[" + selected_nodes + "]"
-}
-
-    if str_eq(selected, "") { return "" }
-    // Strip the _sel_N sentinel fields that were used for duplicate-detection bookkeeping.
-    // The sentinels have the form "\"_sel_N\":1," (trailing comma, space before next key).
-    // We injected them as the first field in each object, so the pattern is predictable.
-    // Because el has no regex, remove up to 20 possible sentinel variants by literal replace.
-    let clean: String = "[" + selected + "]"
-    let c0: String = str_replace(clean, "\"_sel_0\":1,", "")
-    let c1: String = str_replace(c0, "\"_sel_1\":1,", "")
-    let c2: String = str_replace(c1, "\"_sel_2\":1,", "")
-    let c3: String = str_replace(c2, "\"_sel_3\":1,", "")
-    let c4: String = str_replace(c3, "\"_sel_4\":1,", "")
-    let c5: String = str_replace(c4, "\"_sel_5\":1,", "")
-    let c6: String = str_replace(c5, "\"_sel_6\":1,", "")
-    let c7: String = str_replace(c6, "\"_sel_7\":1,", "")
-    let c8: String = str_replace(c7, "\"_sel_8\":1,", "")
-    let c9: String = str_replace(c8, "\"_sel_9\":1,", "")
-    let c10: String = str_replace(c9, "\"_sel_10\":1,", "")
-    let c11: String = str_replace(c10, "\"_sel_11\":1,", "")
-    let c12: String = str_replace(c11, "\"_sel_12\":1,", "")
-    let c13: String = str_replace(c12, "\"_sel_13\":1,", "")
-    let c14: String = str_replace(c13, "\"_sel_14\":1,", "")
-    return c14
-}
-
 // engram_split_topics — split message into sub-queries on explicit conjunctions.
 // "health goals AND startup progress" becomes two independent searches.
 fn engram_split_topics(message: String) -> String {
@@ -495,6 +377,38 @@ fn engram_nodes_merge(a: String, b: String) -> String {
    return engram_dedup_nodes("[" + ai + "," + bi + "]")
 }

+// id_in_seen — true when node_id appears in the pipe-delimited seen set.
+fn id_in_seen(node_id: String, seen: String) -> Bool {
+    if str_eq(node_id, "") { return false }
+    if str_eq(seen, "") { return false }
+    return str_contains(seen, "|" + node_id + "|")
+}
+
+// add_to_seen — append node_id to the pipe-delimited seen set.
+fn add_to_seen(seen: String, node_id: String) -> String {
+    if str_eq(node_id, "") { return seen }
+    if id_in_seen(node_id, seen) { return seen }
+    return seen + "|" + node_id + "|"
+}
+
+// engram_extract_ids — extract the "id" field from each node in a JSON array,
+// returning a pipe-delimited string suitable for id_in_seen / add_to_seen.
+fn engram_extract_ids(nodes_json: String) -> String {
+    if str_eq(nodes_json, "") { return "" }
+    if str_eq(nodes_json, "[]") { return "" }
+    let total: Int = json_array_len(nodes_json)
+    if total == 0 { return "" }
+    let seen: String = ""
+    let i: Int = 0
+    while i < total {
+        let node: String = json_array_get(nodes_json, i)
+        let node_id: String = json_get(node, "id")
+        let seen = add_to_seen(seen, node_id)
+        let i = i + 1
+    }
+    return seen
+}
+
 // Q4 note: engram_compile has no cache or circuit-breaker at the EL layer.
 // Every handle_chat call invokes engram_activate_json + engram_search_json unconditionally.
 // If the engram backend is repeatedly unreachable (e.g., during startup or after a crash),
@@ -584,6 +498,10 @@ fn engram_compile(intent: String) -> String {
    let merged: String = engram_nodes_merge(merged, recall_boost)
    let merged_nodes: String = merged

+    // Publish compiled IDs to state so session_preload can skip duplicate nodes.
+    let ids_from_merged: String = engram_extract_ids(merged_nodes)
+    state_set("engram_compile_seen_ids", ids_from_merged)
+
    // Fallback: when all searches return nothing, fetch persona nodes.
    let scan_part: String = if str_eq(merged_nodes, "") || str_eq(merged_nodes, "[]") {
        let persona_fallback: String = engram_search_json("soul:persona Persona identity", 5)
@@ -648,12 +566,8 @@ fn engram_compile(intent: String) -> String {
    let sep_ma: String = if !str_eq(main_part, "") && !str_eq(affective_part, "") { "\n" } else { "" }
    let ctx: String = main_part + sep_ma + affective_part

-    // Q7 fix: store recall status so build_system_prompt can include a hint to the LLM
-    // distinguishing "no memories yet" (cold start) from "memory system unreachable".
-    // Values: "ok" | "empty" | "unavailable"
-    let any_ok: Bool = act_ok || srch_ok || scan_ok || affective_ok
-    let all_failed: Bool = act_failed && srch_failed
-    let recall_status: String = if any_ok { "ok" } else { if all_failed { "unavailable" } else { "empty" } }
+    // Publish recall_status for build_system_prompt: "ok" when ctx has content, "empty" otherwise.
+    let recall_status: String = if str_eq(ctx, "") { "empty" } else { "ok" }
    state_set("engram_recall_status", recall_status)

    if str_eq(ctx, "") {
@@ -715,6 +629,17 @@ fn build_system_prompt(ctx: String, chat_mode: Bool) -> String {
        "\n\n[IDENTITY GRAPH — who you are, loaded from your engram]\n" + id_ctx
    }

+    // soul_affective_context is loaded at boot by load_identity_context() with BellEvent/
+    // PositiveEvent nodes from the last 7 days. Surfaced here so the LLM sees historical
+    // emotional patterns from prior sessions at every turn.
+    // Issue 1 fix: declare affective_boot_block before it is referenced in the return.
+    let boot_aff_ctx: String = state_get("soul_affective_context")
+    let affective_boot_block: String = if str_eq(boot_aff_ctx, "") {
+        ""
+    } else {
+        "\n\n[CROSS-SESSION EMOTIONAL CONTEXT — from prior sessions]\n" + boot_aff_ctx
+    }
+
    // Q7 fix: if recall produced no results, include a hint so the LLM can respond
    // authentically ("I seem to be starting fresh" vs "memory system may be down")
    // rather than silently acting as if it has context it doesn't have.
@@ -909,6 +834,29 @@ fn conv_history_load() -> String {
    return content
 }

+// session_preload_bullets — render up to max_bullets nodes from a JSON array as
+// bullet lines, truncating content at snip_len chars each.
+fn session_preload_bullets(nodes: String, max_bullets: Int, snip_len: Int) -> String {
+    if str_eq(nodes, "") { return "" }
+    if str_eq(nodes, "[]") { return "" }
+    let total: Int = json_array_len(nodes)
+    let limit: Int = if max_bullets < total { max_bullets } else { total }
+    let bullets: String = ""
+    let i: Int = 0
+    while i < limit {
+        let node: String = json_array_get(nodes, i)
+        let content: String = json_get(node, "content")
+        let snip: String = if str_len(content) > snip_len { str_slice(content, 0, snip_len) } else { content }
+        let bullets = if str_eq(snip, "") {
+            bullets
+        } else {
+            if str_eq(bullets, "") { "- " + snip } else { bullets + "\n- " + snip }
+        }
+        let i = i + 1
+    }
+    return bullets
+}
+
 fn handle_chat(body: String) -> String {
    let message: String = json_get(body, "message")
    if str_eq(message, "") {
@@ -994,9 +942,10 @@ fn handle_chat(body: String) -> String {
        }
    }

-    // Issue 4 fix: engram_compile_multi adds entity + emotion fan-out seeds
-    let ctx: String = engram_compile_multi(activation_seed, message)
-    let system: String = affective_prefix + build_system_prompt(ctx)
+    let ctx: String = engram_compile(activation_seed)
+    let system: String = affective_prefix + build_system_prompt(ctx, true)
+
+    let seen_ids: String = state_get("engram_compile_seen_ids")

    // Issue 9 fix: add project-specific and session-summary searches to session preload.
    // Old hardcoded "user profile" and "in_progress active project" miss project-specific
@@ -1643,7 +1592,7 @@ fn handle_chat_agentic(body: String) -> String {
    if str_eq(screen_action, "hard_bell") {
        safety_log_bell("hard", json_get(screen_result, "reason"), str_slice(message, 0, 80))
        return "{\"reply\":\"" + json_safe(safety_validate("", "hard_bell")) + "\",\"model\":\"\",\"agentic\":true,\"tools_used\":[]}"
-
+    }

    let req_model: String = json_get(body, "model")
    let model: String = if str_eq(req_model, "") { chat_default_model() } else { req_model }
Author	SHA1	Message	Date
will.anderson	f2b63f0048	fix(emergency): repair session-continuity regressions from prior merge	2026-06-22 14:51:51 -05:00
will.anderson	774688cfb9	fix/session-continuity-hook Neuron Soul CI / build (push) Has been cancelled Details Deploy Soul to GKE / deploy (push) Failing after 6m0s Details	2026-06-22 14:29:31 -05:00
will.anderson	aa2404b3f7	fix/context-dedup-shared-ids	2026-06-22 14:29:06 -05:00
will.anderson	94b55d667c	fix/engram-float-parser	2026-06-22 14:28:17 -05:00
will.anderson	f73c913498	fix(session-continuity): address all adversarial review findings Issue 1 (CRITICAL): Restore parse_float_x100 for correct single-decimal float handling. "0.9" now correctly yields 90, not 9. Also restores engram_numeric_valid guard that validates inputs before str_to_int. Issue 2 (CRITICAL): Fix handle_chat_agentic safety screen history key regression. state_get("conversation_history") -> state_get("conv_history") so the safety screen receives actual history instead of always "". Issue 3 (REAL BUG): Replace _sel_N JSON sentinel injection in engram_compile_ranked with \|N\| index string tracking. Sentinels were leaking into node JSON delivered to the LLM and cleanup only covered indices 0-14, leaving indices 15+ uncleaned. Issue 4 (REGRESSION): Restore rendered conversation history formatting. Conversation history is now rendered as "User: .../Assistant: ..." with 400-char truncation per turn, not raw JSON array injection. Issue 5 (SCOPE/SAFETY): Restore removed defensive code: engram_numeric_valid and parse_float_x100 guards; conv_history_load label-based fetch + partial- write guard + load-failure state flag; conv_history_persist partial-write guard + failure logging; hist_warning in response envelope. Issue 6 (UNDOCUMENTED): Restore bell event cutoff from 259200s (3 days) back to 1209600s (14 days). Also restore PositiveEvent affective context search that was removed alongside the cutoff change. Issue 7 (LOGIC REGRESSION): Fix affective_prefix to run every turn (not just hist_len == 0). The care/joy directives must persist throughout the session, not vanish after turn 1. Issue 8 (MINOR): session_summary_write_dated now uses el_from_float(0.85) for salience and importance (two-decimal) to avoid any ambiguity in float parsing, and the function is re-added with the session-end hook.	2026-06-22 14:25:29 -05:00
will.anderson	588ca11f57	fix(context-dedup): include scan_part and affective_part IDs in seen set Two design bugs in the state_set placement caused the dedup seen-ID set to be incomplete even with callsites wired up: 1. state_set("engram_compile_seen_ids") was called immediately after merging the main node pools, before scan_part (persona fallback) and affective_part (bell node) were computed. Nodes appearing only in those segments were never added to the seen set. 2. affective_part is a bare JSON object (bn0 from json_array_get), not a JSON array. Passing it to engram_extract_ids would have gotten json_array_len == 0 and silently skipped the affective node's ID. Fix: move state_set to after ctx is assembled from all three segments. Extract ids_from_merged and ids_from_scan via engram_extract_ids (both are JSON arrays), and extract ids_from_affective via json_get(affective_part, "id") directly since it is a bare object. Merge all three via add_to_seen before publishing to state.	2026-06-22 14:19:14 -05:00
will.anderson	9e178d8371	fix(recall): deduplicate engram nodes by ID across activation and search passes Thread a seen-node-ID exclusion set from engram_compile() through to session_preload in handle_chat, preventing the same high-salience nodes (identity, recent memories) from appearing 2-3x in the system prompt. Changes: - Add id_in_seen(), add_to_seen(), engram_extract_ids() helpers that maintain a comma-delimited seen-ID accumulator (EL has no Set type) - In engram_compile(): after merging all topic/entity/recall pools, extract node IDs from merged_nodes and publish via state_set(engram_compile_seen_ids) - In handle_chat(): read seen_ids from state after engram_compile() returns, then check id_in_seen() before emitting each session_preload bullet (profile x3, work x2, project x2, summary x1 — all 8 candidate nodes guarded) Nodes already present in the compiled engram context are skipped in preload, eliminating 3000-3500 token repetition on first-message turns.	2026-06-22 14:06:04 -05:00
will.anderson	aaada3770a	fix(recall): deduplicate engram nodes by ID across activation and search passes engram_compile() already published seen node IDs to state via engram_compile_seen_ids but handle_chat never read or applied them. Wire up the consumption side: - Read engram_compile_seen_ids from state after engram_compile() returns - Check each session_preload candidate node (profile x3, work x2, project x2, summary x3) against id_in_seen() before emitting its content bullet - Nodes already present in the compiled engram context are skipped entirely, preventing the same high-salience identity/memory nodes from appearing 2-3x in the system prompt and burning 3000-3500 tokens on repetition	2026-06-22 14:03:48 -05:00
will.anderson	a0299c0a89	fix(recall): session-end summary hook + session summary recall at start	2026-06-22 14:01:56 -05:00