diff --git a/chat.el b/chat.el index 681c1a5..dcc5b42 100644 --- a/chat.el +++ b/chat.el @@ -35,6 +35,41 @@ fn engram_numeric_valid(s: String) -> Bool { return true } +// parse_float_x100 — parse a float string like "0.85", "0.9", "1.0" into an integer +// scaled by 100 (so "0.85" -> 85, "0.9" -> 90, "1.0" -> 100). Uses only integer +// arithmetic because el has no float math. Normalises to exactly 2 decimal digits +// before stripping the dot so 1-decimal values like "0.9" are not misread as 9. +// Returns 70 (a safe mid-range default) for empty or structurally invalid strings. +fn parse_float_x100(s: String) -> Int { + if str_eq(s, "") { return 70 } + if !str_contains(s, ".") { + // Integer input: treat as a whole number * 100 (e.g. "1" -> 100) + let whole: Int = str_to_int(s) + return whole * 100 + } + // Split at the dot. str_slice(s, 0, dot_pos) gives left, rest gives right. + let dot_pos: Int = str_index_of(s, ".") + let left: String = str_slice(s, 0, dot_pos) + let right_raw: String = str_slice(s, dot_pos + 1, str_len(s)) + // Normalise right side to exactly 2 decimal digits. + let right: String = if str_eq(right_raw, "") { + "00" + } else { + if str_len(right_raw) == 1 { + right_raw + "0" + } else { + if str_len(right_raw) >= 3 { + str_slice(right_raw, 0, 2) + } else { + right_raw + } + } + } + let left_val: Int = if str_eq(left, "") { 0 } else { str_to_int(left) } + let right_val: Int = str_to_int(right) + return left_val * 100 + right_val +} + // engram_score_node — compute a recency x relevance score for a single engram // node JSON object. Higher is better. Score = salience * importance * recency_factor. // recency_factor decays linearly over 30 days: nodes updated today score 1.0, @@ -50,13 +85,13 @@ fn engram_score_node(node_json: String) -> Int { let tier_str: String = json_get(node_json, "tier") // Q1 fix: validate before str_to_int. Non-numeric values fall back to safe defaults. - // Parse as floats via * 100 integer arithmetic (el has no float math). + // parse_float_x100 handles 1- and 2-decimal floats correctly ("0.9" -> 90, "0.85" -> 85). let salience_100: Int = if !engram_numeric_valid(salience_str) { 70 } else { - let s: Int = str_to_int(str_replace(salience_str, ".", "")) + let s: Int = parse_float_x100(salience_str) if s > 100 { 100 } else { if s < 0 { 0 } else { s } } } let importance_100: Int = if !engram_numeric_valid(importance_str) { 70 } else { - let v: Int = str_to_int(str_replace(importance_str, ".", "")) + let v: Int = parse_float_x100(importance_str) if v > 100 { 100 } else { if v < 0 { 0 } else { v } } } @@ -97,7 +132,7 @@ fn engram_render_node(node_json: String) -> String { } let salience_str: String = json_get(node_json, "salience") let sal_100: Int = if str_eq(salience_str, "") { 0 } else { - let s: Int = str_to_int(str_replace(salience_str, ".", "")) + let s: Int = parse_float_x100(salience_str) if s > 100 { 100 } else { if s < 0 { 0 } else { s } } } let salience_hint: String = if str_eq(salience_str, "") { "" } else { @@ -177,8 +212,8 @@ fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String { while ci < total { let node: String = json_array_get(nodes_json, ci) let score: Int = engram_score_node(node) - // Threshold: includes moderately-relevant older nodes (score >= 15). - let above_thresh: Bool = score >= 15 + // Threshold 25: sal=0.5 * imp=0.5 * recency=1.0 -> 50*50*100/10000 = 25. + let above_thresh: Bool = score >= 25 let idx_marker: String = "|" + int_to_str(ci) + "|" let already_picked: Bool = str_contains(selected_indices, idx_marker) let is_better: Bool = score > best_score && above_thresh && !already_picked @@ -223,14 +258,14 @@ fn engram_score_node(node_json: String) -> Int { let importance_str: String = json_get(node_json, "importance") let created_str: String = json_get(node_json, "created_at") - // Parse as floats via * 100 integer arithmetic (el has no float math) + // parse_float_x100 handles 1- and 2-decimal floats correctly ("0.9" -> 90, "0.85" -> 85). + // Default 70 when field is absent; clamp to 0-100 range. let salience_100: Int = if str_eq(salience_str, "") { 70 } else { - let s: Int = str_to_int(str_replace(salience_str, ".", "")) - // Clamp to 0-100 range (value was e.g. "0.85" -> parsed "085" = 85) + let s: Int = parse_float_x100(salience_str) if s > 100 { 100 } else { if s < 0 { 0 } else { s } } } let importance_100: Int = if str_eq(importance_str, "") { 70 } else { - let v: Int = str_to_int(str_replace(importance_str, ".", "")) + let v: Int = parse_float_x100(importance_str) if v > 100 { 100 } else { if v < 0 { 0 } else { v } } } @@ -249,9 +284,10 @@ fn engram_score_node(node_json: String) -> Int { } // engram_compile_ranked — build a context string from a JSON array of node objects, -// ordered best-first by score. Only nodes above threshold=15 are included. -// With corrected parsing: sal=0.5 * imp=0.5 at max recency scores 25; threshold 15 -// gives headroom for moderately-relevant older nodes while filtering near-zero noise. +// ordered best-first by score. Only nodes above threshold=25 are included. +// With corrected float parsing: sal=0.5 * imp=0.5 at max recency (100) scores exactly 25, +// so threshold=25 admits all nodes with at least moderate salience and importance while +// cutting near-zero noise. Lower values were masking the bug; 25 is correct post-fix. // Returns at most max_nodes entries. max_nodes must not exceed 20 (sentinel limit). fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String { if str_eq(nodes_json, "") { return "" } @@ -268,9 +304,8 @@ fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String { while ci < total { let node: String = json_array_get(nodes_json, ci) let score: Int = engram_score_node(node) - // Threshold lowered from 25 to 15: includes moderately-relevant older nodes. - // A 3-week-old node with salience 0.6 and importance 0.6 scores ~18 — was dropped, now included. - let above_thresh: Bool = score >= 15 + // Threshold 25: sal=0.5 * imp=0.5 * recency=1.0 -> 50*50*100/10000 = 25. + let above_thresh: Bool = score >= 25 // Check this index wasn't already selected (sentinel: look for idx marker) let idx_marker: String = "\"_sel_" + int_to_str(ci) + "\"" let already_picked: Bool = str_contains(selected, idx_marker) @@ -1124,12 +1159,29 @@ fn handle_chat(body: String) -> String { state_set("conv_history", final_hist) conv_history_persist(final_hist) - // Automatic session-end summary: write/overwrite the SessionSummary node on each turn - // so process restarts always have a continuity snapshot (no shutdown hook needed). - // Uses autogenerate (no LLM) so it is cheap — the node is overwritten not appended. - let auto_sum: String = session_summary_autogenerate(final_hist) - if !str_eq(auto_sum, "") { - let discard_sum: String = session_summary_write(auto_sum) + // Session-end summary hook: write a dated SessionSummary node once per boot when + // the conversation reaches >= 5 user turns (10 hist entries = 5 user+assistant pairs). + // Uses a per-boot label ("session:summary:") so summaries accumulate across + // sessions instead of overwriting a single global node. A state flag prevents rewriting + // on every subsequent turn once the threshold is crossed. + let final_hist_len: Int = json_array_len(final_hist) + if final_hist_len >= 10 { + let already_wrote: String = state_get("session_summary_written") + if str_eq(already_wrote, "") { + // Derive (or create) a stable boot-scoped session id. + let boot_id: String = state_get("session_boot_id") + let boot_id = if str_eq(boot_id, "") { + let new_id: String = int_to_str(time_now()) + state_set("session_boot_id", new_id) + new_id + } else { boot_id } + let sess_label: String = "session:summary:" + boot_id + let auto_sum: String = session_summary_autogenerate(final_hist) + if !str_eq(auto_sum, "") { + let discard_sum: String = session_summary_write_dated(auto_sum, sess_label) + state_set("session_summary_written", "1") + } + } } let activation_nodes: String = engram_activate_json(message, 2) @@ -2212,6 +2264,32 @@ fn session_summary_write(summary_text: String) -> String { return node_id } +// session_summary_write_dated — write a SessionSummary node with a caller-supplied dated label. +// Unlike session_summary_write, this does NOT delete old nodes — each session accumulates its +// own node so engram_search_json("session:summary") can return multiple past sessions. +// The label must be unique per session (e.g. "session:summary:"). +fn session_summary_write_dated(summary_text: String, label: String) -> String { + if str_eq(summary_text, "") { return "" } + if str_eq(label, "") { return "" } + let safe_text: String = str_replace(summary_text, "\"", "'") + let trimmed: String = if str_len(safe_text) > 800 { str_slice(safe_text, 0, 800) } else { safe_text } + let ts: Int = time_now() + let ts_str: String = int_to_str(ts) + let content: String = "[session-summary] " + trimmed + " | ts:" + ts_str + let tags: String = "[\"SessionSummary\",\"session-summary\",\"previous-session\",\"consolidate\"]" + let node_id: String = engram_node_full( + content, "SessionSummary", label, + el_from_float(0.9), el_from_float(0.8), el_from_float(1.0), + "Episodic", tags + ) + if str_eq(node_id, "") { + println("[chat] session_summary_write_dated: engram write failed — summary node lost (label=" + label + ")") + return "" + } + println("[chat] session_summary_write_dated: wrote SessionSummary (" + int_to_str(str_len(content)) + " chars) label=" + label + " -> " + node_id) + return node_id +} + // session_summary_autogenerate — build a minimal summary from conversation history without LLM. // Extracts user message snippets (first 80 chars each, up to 5 turns). // Used as the automatic session-end hook so every turn produces a continuity snapshot.