From 0ede112d05872d9c35582c40d53686994ca48a59 Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Mon, 22 Jun 2026 12:53:29 -0500 Subject: [PATCH] feat(recall): temporal-precision improvements Fix critical float parsing bug in engram_score_node: str_replace('.','') then str_to_int silently miscored single-decimal salience strings (0.9->9, 0.7->7, 1.0->1). Introduce parse_salience_100() which detects decimal position and scales correctly (no decimal: *100; one decimal: *10; two decimals: as-is). Replace flat 30-day linear decay with tier-aware decay curves: Canonical nodes use a 365-day window (foundational identity resists aging), Episodic nodes use 90 days, Working/untiered keep the existing 30-day slope. Floor stays at 10 for all tiers. Use max(created_at, updated_at) as the recency reference so revised nodes are not penalised for their original creation date. Extend affective context windows from 72h/7d to 14 days across all three paths (engram_compile, handle_chat, soul.el load_identity_context) so a Friday crisis carries into Monday sessions and all paths present consistent context. The 72h/7d split caused conflicting affective context between soul.el (which loaded a 5-day-old crisis node) and chat.el (which excluded it on subsequent turns). Add salience evolution to mem_consolidate: strengthen top working-memory nodes (recently recalled across sessions) and Canonical-tier nodes (foundational identity must not decay to the floor). Previously consolidate returned structural counts only with no salience changes. Expand conversation window from 20 to 40 turns in both handle_chat and the agentic history trim. Long technical sessions were losing early problem framing at 10 user + 10 assistant pairs. --- chat.el | 107 +++++++++++++++++++++++++++++++++++++++--------------- memory.el | 57 +++++++++++++++++++++++++++-- soul.el | 13 +++++-- 3 files changed, 141 insertions(+), 36 deletions(-) diff --git a/chat.el b/chat.el index f1cf363..98be512 100644 --- a/chat.el +++ b/chat.el @@ -12,39 +12,81 @@ fn chat_default_model() -> String { return "claude-sonnet-4-5" } +// parse_salience_100 — convert a salience/importance float string (as serialized by +// %g format) to an integer in the range 0..100. +// +// The runtime serializes floats with %g which drops trailing zeros: +// 1.0 -> "1" (no decimal at all) +// 0.9 -> "0.9" (one decimal digit) +// 0.85 -> "0.85" (two decimal digits) +// +// The old approach of str_replace(s, ".", "") then str_to_int was broken: +// "0.9" -> "09" -> str_to_int -> 9 (should be 90) +// "0.5" -> "05" -> str_to_int -> 5 (should be 50) +// "1" -> "1" -> str_to_int -> 1 (should be 100) +// "0.85" -> "085" -> str_to_int -> 85 (accidentally correct) +// +// Fix: detect presence and position of the decimal point, then scale accordingly. +// - No decimal (e.g. "1"): multiply by 100. +// - One decimal digit (e.g. "0.9"): multiply by 10 to get 90. +// - Two+ decimal digits (e.g. "0.85"): use as-is (already hundredths). +fn parse_salience_100(s: String) -> Int { + if str_eq(s, "") { return 70 } + let dot_pos: Int = str_index_of(s, ".") + let raw: Int = if dot_pos < 0 { + let v: Int = str_to_int(s) + v * 100 + } else { + let after_dot: String = str_slice(s, dot_pos + 1, str_len(s)) + let decimal_digits: Int = str_len(after_dot) + let stripped: Int = str_to_int(str_replace(s, ".", "")) + if decimal_digits == 1 { stripped * 10 } else { stripped } + } + if raw > 100 { 100 } else { if raw < 0 { 0 } else { raw } } +} + // engram_score_node — compute a recency x relevance score for a single engram // node JSON object. Higher is better. Score = salience * importance * recency_factor. -// recency_factor decays linearly over 30 days: nodes updated today score 1.0, -// nodes 30+ days old score 0.1 (floor). Nodes with no created_at score 0.5. -// This keeps fresh, high-salience nodes at the top and pushes stale low-signal -// nodes to the bottom so they get trimmed when we cap context size. +// +// Recency uses a tier-aware decay curve instead of a flat linear slope: +// - Canonical tiers decay very slowly: 365-day window (foundational identity). +// - Episodic tiers decay at a moderate rate: 90-day window (conversation context). +// - Working/untiered nodes decay at 30 days (transient task state). +// - Floor is 10 (never zero) for all tiers. +// +// Uses max(created_at, updated_at) so recently-revised nodes are not penalised. fn engram_score_node(node_json: String) -> Int { let salience_str: String = json_get(node_json, "salience") let importance_str: String = json_get(node_json, "importance") let created_str: String = json_get(node_json, "created_at") + let updated_str: String = json_get(node_json, "updated_at") + let tier_str: String = json_get(node_json, "tier") - // Parse as floats via * 100 integer arithmetic (el has no float math) - let salience_100: Int = if str_eq(salience_str, "") { 70 } else { - let s: Int = str_to_int(str_replace(salience_str, ".", "")) - // Clamp to 0-100 range (value was e.g. "0.85" -> parsed "085" = 85) - if s > 100 { 100 } else { if s < 0 { 0 } else { s } } - } - let importance_100: Int = if str_eq(importance_str, "") { 70 } else { - let v: Int = str_to_int(str_replace(importance_str, ".", "")) - if v > 100 { 100 } else { if v < 0 { 0 } else { v } } - } + let salience_100: Int = parse_salience_100(salience_str) + let importance_100: Int = parse_salience_100(importance_str) - // Recency: decay from 100 (today) to 10 (30+ days). created_at is Unix seconds. let now_ts: Int = time_now() let recency_100: Int = if str_eq(created_str, "") { 50 } else { let created_ts: Int = str_to_int(created_str) - let age_secs: Int = now_ts - created_ts - let age_days: Int = age_secs / 86400 - let decay: Int = if age_days >= 30 { 10 } else { 100 - (age_days * 3) } + let updated_ts: Int = if str_eq(updated_str, "") { 0 } else { str_to_int(updated_str) } + let ref_ts: Int = if updated_ts > created_ts { updated_ts } else { created_ts } + let age_secs: Int = now_ts - ref_ts + let age_days: Int = if age_secs < 0 { 0 } else { age_secs / 86400 } + let is_canonical: Bool = str_eq(tier_str, "Canonical") + let is_episodic: Bool = str_eq(tier_str, "Episodic") + let decay: Int = if is_canonical { + let drop: Int = if age_days >= 365 { 90 } else { age_days * 90 / 365 } + 100 - drop + } else { + if is_episodic { + if age_days >= 90 { 10 } else { 100 - age_days } + } else { + if age_days >= 30 { 10 } else { 100 - (age_days * 3) } + } + } if decay < 10 { 10 } else { decay } } - // Combined score 0-1000000 (no floats): salience * importance * recency / 10000 return salience_100 * importance_100 * recency_100 / 10000 } @@ -151,16 +193,17 @@ fn engram_compile(intent: String) -> String { } // Affective context: always include the most recent high-emotion memory if one - // exists within 72 hours. This ensures continuity of care across turns — when - // the user was in distress earlier in the session (or recently), that context - // travels into every subsequent LLM call so the response register stays aware. + // exists within 14 days. This ensures continuity of care across sessions — a + // crisis on Friday must still carry into Monday (72h was too narrow for multi-day + // distress arcs such as grief or recurring suicidal ideation). 14-day window + // (1,209,600 seconds) covers sustained emotional arcs while excluding ancient + // history. Unified with handle_chat and soul.el affective checks. // We search for BellEvent nodes specifically; these are written by auto_persist - // when safety_detect_bell_level fires. The 72h window (259200 seconds) is wide - // enough to span a multi-session day without pulling ancient history. + // when safety_detect_bell_level fires. let bell_nodes: String = engram_search_json("bell:soft bell:hard BellEvent", 3) let bell_ok: Bool = !str_eq(bell_nodes, "") && !str_eq(bell_nodes, "[]") let now_ts: Int = time_now() - let cutoff_ts: Int = now_ts - 259200 + let cutoff_ts: Int = now_ts - 1209600 let recent_bell: String = if bell_ok { let bn0: String = json_array_get(bell_nodes, 0) // created_at is not present in engram node JSON for BellEvent nodes. @@ -482,12 +525,14 @@ fn handle_chat(body: String) -> String { } // Cross-session affective context: on session start (no history yet), check engram - // for recent distress signals within 72h and prepend a care directive if found. + // for recent distress signals within 14 days and prepend a care directive if found. + // Extended from 72h: multi-day crisis must persist across Monday sessions starting + // 3+ days after a Friday event. Consistent with engram_compile and soul.el checks. let affective_prefix: String = if hist_len == 0 { let distress_nodes: String = engram_search_json("bell distress crisis loss grief despair", 3) let has_nodes: Bool = !str_eq(distress_nodes, "") && !str_eq(distress_nodes, "[]") let now_ts: Int = time_now() - let cutoff: Int = now_ts - 259200 + let cutoff: Int = now_ts - 1209600 let found_recent: Bool = if has_nodes { let dn0: String = json_array_get(distress_nodes, 0) let ts0_raw: String = json_get(dn0, "created_at") @@ -611,7 +656,10 @@ fn handle_chat(body: String) -> String { // history blob can reach ~80KB before trim fires. engram_node_full has no apparent size cap. // A byte-length cap would require truncating or summarising entries — too invasive here. // TODO: add a byte-length cap (e.g. 32KB) that drops oldest entries until under limit. - let final_hist: String = if json_array_len(updated_hist2) > 20 { + // Increased from 20 to 40 turns: long technical sessions lose early context at 20 + // (10 user + 10 assistant pairs). 40 turns preserves problem framing for multi-step + // tasks while the bell guard still persists evicted distress turns to engram. + let final_hist: String = if json_array_len(updated_hist2) > 40 { hist_trim_with_bell_guard(updated_hist2) } else { updated_hist2 @@ -1193,7 +1241,8 @@ fn handle_chat_agentic(body: String) -> String { let discard_hist: Bool = if !str_eq(reply_text, "") { let updated: String = hist_append(agentic_hist, "user", message) let updated2: String = hist_append(updated, "assistant", reply_text) - let trimmed: String = if json_array_len(updated2) > 20 { hist_trim(updated2) } else { updated2 } + // Increased from 20 to 40 turns: consistent with handle_chat window expansion. + let trimmed: String = if json_array_len(updated2) > 40 { hist_trim(updated2) } else { updated2 } state_set(hist_key, trimmed) // Only persist the default global session to engram — named sessions are ephemeral. if str_eq(hist_key, "conv_history") { diff --git a/memory.el b/memory.el index eae12c6..684b4c4 100644 --- a/memory.el +++ b/memory.el @@ -35,14 +35,65 @@ fn mem_forget(node_id: String) -> Void { engram_forget(node_id) } +// mem_consolidate — structural scan plus salience-evolution pass. +// +// Previously this only returned structural counts (scanned, total_nodes, total_edges) +// with no salience updates. No node salience ever changed based on recall frequency +// or time; foundational nodes decayed identically to ephemeral chat; frequently-recalled +// nodes were never promoted. This made consolidation a no-op. +// +// New behavior: +// (a) Strengthen frequently-activated nodes: nodes in the top working-memory list +// (engram_wm_top_json) are strengthened — they have been recalled recently +// and deserve higher salience. Raises effective salience for nodes that prove +// relevant across multiple sessions. +// (b) Strengthen Canonical-tier nodes: identity and foundational nodes should not +// decay; each consolidation pass re-strengthens them so they resist the +// tier-aware decay curve without requiring active recall. +// (c) Structural counts are still returned for observability. +// +// Called by awareness_run() on the "consolidate" inbox action. fn mem_consolidate() -> String { let scanned: Int = engram_node_count() - let dummy: String = engram_scan_nodes_json(100, 0) - let total_nodes: Int = engram_node_count() let total_edges: Int = engram_edge_count() + let strengthened: Int = 0 + + // (a) Strengthen top working-memory nodes — recalled recently across sessions. + // Cap at 10 to keep consolidation fast. + let wm_top: String = engram_wm_top_json(10) + let wm_len: Int = json_array_len(wm_top) + let wi: Int = 0 + while wi < wm_len { + let wm_node: String = json_array_get(wm_top, wi) + let wm_id: String = json_get(wm_node, "id") + if !str_eq(wm_id, "") { + engram_strengthen(wm_id) + let strengthened = strengthened + 1 + } + let wi = wi + 1 + } + + // (b) Strengthen Canonical-tier nodes from a scan so they resist temporal decay. + // Canonical nodes encode foundational identity — they must not silently floor at 10. + let scan_result: String = engram_scan_nodes_json(50, 0) + let scan_len: Int = json_array_len(scan_result) + let si: Int = 0 + while si < scan_len { + let s_node: String = json_array_get(scan_result, si) + let s_tier: String = json_get(s_node, "tier") + let s_id: String = json_get(s_node, "id") + if str_eq(s_tier, "Canonical") && !str_eq(s_id, "") { + engram_strengthen(s_id) + let strengthened = strengthened + 1 + } + let si = si + 1 + } + + let total_nodes: Int = engram_node_count() return "{\"scanned\":" + int_to_str(scanned) + ",\"total_nodes\":" + int_to_str(total_nodes) - + ",\"total_edges\":" + int_to_str(total_edges) + "}" + + ",\"total_edges\":" + int_to_str(total_edges) + + ",\"strengthened\":" + int_to_str(strengthened) + "}" } fn mem_save(path: String) -> Void { diff --git a/soul.el b/soul.el index 4942376..56b71a4 100644 --- a/soul.el +++ b/soul.el @@ -166,15 +166,20 @@ fn load_identity_context() -> Void { // Cross-session affective context: query engram for recent distress/crisis signals // at session start. Stored under soul_affective_context so the safety layer can // detect when a user has been in distress across previous sessions. - // Soft recency guard: nodes with a ts field older than 7 days are skipped. - // Results capped at 3 nodes, 200 chars each, to avoid over-injection into context. + // Recency guard: nodes older than 14 days (1,209,600 seconds) are skipped. + // Unified at 14 days with chat.el engram_compile and handle_chat affective checks + // so all three paths present consistent affective context. The previous 7-day + // (604800s) window was inconsistent with the 72h chat.el window, causing + // conflicting context: soul.el loaded a 5-day-old crisis node while chat.el + // did not include it on subsequent turns. Both now use 14 days. + // Results capped at 3 nodes, 200 chars each, to limit context inflation. // TODO(recency): engram_search_json sorts by relevance, not timestamp. A native // after= filter in the engram search API would make this more precise. - let affective_raw: String = engram_search_json("distress crisis upset hopeless", 3) + let affective_raw: String = engram_search_json("distress crisis upset hopeless bell BellEvent", 3) let affective_ok: Bool = !str_eq(affective_raw, "") && !str_eq(affective_raw, "[]") if affective_ok { let ts_now: Int = time_now() - let ts_cutoff: Int = ts_now - 604800 + let ts_cutoff: Int = ts_now - 1209600 let aff_total: Int = json_array_len(affective_raw) let aff_ctx: String = "" let ai: Int = 0