fix/engram-float-parser

2026-06-22 14:28:17 -05:00
parent c93be6a315 33cb1138f4
commit 94b55d667c
1 changed files with 100 additions and 22 deletions
@@ -35,6 +35,41 @@ fn engram_numeric_valid(s: String) -> Bool {
    return true
 }

+// parse_float_x100 — parse a float string like "0.85", "0.9", "1.0" into an integer
+// scaled by 100 (so "0.85" -> 85, "0.9" -> 90, "1.0" -> 100). Uses only integer
+// arithmetic because el has no float math. Normalises to exactly 2 decimal digits
+// before stripping the dot so 1-decimal values like "0.9" are not misread as 9.
+// Returns 70 (a safe mid-range default) for empty or structurally invalid strings.
+fn parse_float_x100(s: String) -> Int {
+    if str_eq(s, "") { return 70 }
+    if !str_contains(s, ".") {
+        // Integer input: treat as a whole number * 100 (e.g. "1" -> 100)
+        let whole: Int = str_to_int(s)
+        return whole * 100
+    }
+    // Split at the dot. str_slice(s, 0, dot_pos) gives left, rest gives right.
+    let dot_pos: Int = str_index_of(s, ".")
+    let left: String = str_slice(s, 0, dot_pos)
+    let right_raw: String = str_slice(s, dot_pos + 1, str_len(s))
+    // Normalise right side to exactly 2 decimal digits.
+    let right: String = if str_eq(right_raw, "") {
+        "00"
+    } else {
+        if str_len(right_raw) == 1 {
+            right_raw + "0"
+        } else {
+            if str_len(right_raw) >= 3 {
+                str_slice(right_raw, 0, 2)
+            } else {
+                right_raw
+            }
+        }
+    }
+    let left_val: Int = if str_eq(left, "") { 0 } else { str_to_int(left) }
+    let right_val: Int = str_to_int(right)
+    return left_val * 100 + right_val
+}
+
 // engram_score_node — compute a recency x relevance score for a single engram
 // node JSON object. Higher is better. Score = salience * importance * recency_factor.
 // recency_factor decays linearly over 30 days: nodes updated today score 1.0,
@@ -50,13 +85,13 @@ fn engram_score_node(node_json: String) -> Int {
    let tier_str: String = json_get(node_json, "tier")

    // Q1 fix: validate before str_to_int. Non-numeric values fall back to safe defaults.
-    // Parse as floats via * 100 integer arithmetic (el has no float math).
+    // parse_float_x100 handles 1- and 2-decimal floats correctly ("0.9" -> 90, "0.85" -> 85).
    let salience_100: Int = if !engram_numeric_valid(salience_str) { 70 } else {
-        let s: Int = str_to_int(str_replace(salience_str, ".", ""))
+        let s: Int = parse_float_x100(salience_str)
        if s > 100 { 100 } else { if s < 0 { 0 } else { s } }
    }
    let importance_100: Int = if !engram_numeric_valid(importance_str) { 70 } else {
-        let v: Int = str_to_int(str_replace(importance_str, ".", ""))
+        let v: Int = parse_float_x100(importance_str)
        if v > 100 { 100 } else { if v < 0 { 0 } else { v } }
    }

@@ -97,7 +132,7 @@ fn engram_render_node(node_json: String) -> String {
    }
    let salience_str: String = json_get(node_json, "salience")
    let sal_100: Int = if str_eq(salience_str, "") { 0 } else {
-        let s: Int = str_to_int(str_replace(salience_str, ".", ""))
+        let s: Int = parse_float_x100(salience_str)
        if s > 100 { 100 } else { if s < 0 { 0 } else { s } }
    }
    let salience_hint: String = if str_eq(salience_str, "") { "" } else {
@@ -177,8 +212,8 @@ fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String {
        while ci < total {
            let node: String = json_array_get(nodes_json, ci)
            let score: Int = engram_score_node(node)
-            // Threshold: includes moderately-relevant older nodes (score >= 15).
-            let above_thresh: Bool = score >= 15
+            // Threshold 25: sal=0.5 * imp=0.5 * recency=1.0 -> 50*50*100/10000 = 25.
+            let above_thresh: Bool = score >= 25
            let idx_marker: String = "|" + int_to_str(ci) + "|"
            let already_picked: Bool = str_contains(selected_indices, idx_marker)
            let is_better: Bool = score > best_score && above_thresh && !already_picked
@@ -223,14 +258,14 @@ fn engram_score_node(node_json: String) -> Int {
    let importance_str: String = json_get(node_json, "importance")
    let created_str: String = json_get(node_json, "created_at")

-    // Parse as floats via * 100 integer arithmetic (el has no float math)
+    // parse_float_x100 handles 1- and 2-decimal floats correctly ("0.9" -> 90, "0.85" -> 85).
+    // Default 70 when field is absent; clamp to 0-100 range.
    let salience_100: Int = if str_eq(salience_str, "") { 70 } else {
-        let s: Int = str_to_int(str_replace(salience_str, ".", ""))
-        // Clamp to 0-100 range (value was e.g. "0.85" -> parsed "085" = 85)
+        let s: Int = parse_float_x100(salience_str)
        if s > 100 { 100 } else { if s < 0 { 0 } else { s } }
    }
    let importance_100: Int = if str_eq(importance_str, "") { 70 } else {
-        let v: Int = str_to_int(str_replace(importance_str, ".", ""))
+        let v: Int = parse_float_x100(importance_str)
        if v > 100 { 100 } else { if v < 0 { 0 } else { v } }
    }

@@ -249,9 +284,10 @@ fn engram_score_node(node_json: String) -> Int {
 }

 // engram_compile_ranked — build a context string from a JSON array of node objects,
-// ordered best-first by score. Only nodes above threshold=15 are included.
-// With corrected parsing: sal=0.5 * imp=0.5 at max recency scores 25; threshold 15
-// gives headroom for moderately-relevant older nodes while filtering near-zero noise.
+// ordered best-first by score. Only nodes above threshold=25 are included.
+// With corrected float parsing: sal=0.5 * imp=0.5 at max recency (100) scores exactly 25,
+// so threshold=25 admits all nodes with at least moderate salience and importance while
+// cutting near-zero noise. Lower values were masking the bug; 25 is correct post-fix.
 // Returns at most max_nodes entries. max_nodes must not exceed 20 (sentinel limit).
 fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String {
    if str_eq(nodes_json, "") { return "" }
@@ -268,9 +304,8 @@ fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String {
        while ci < total {
            let node: String = json_array_get(nodes_json, ci)
            let score: Int = engram_score_node(node)
-            // Threshold lowered from 25 to 15: includes moderately-relevant older nodes.
-            // A 3-week-old node with salience 0.6 and importance 0.6 scores ~18 — was dropped, now included.
-            let above_thresh: Bool = score >= 15
+            // Threshold 25: sal=0.5 * imp=0.5 * recency=1.0 -> 50*50*100/10000 = 25.
+            let above_thresh: Bool = score >= 25
            // Check this index wasn't already selected (sentinel: look for idx marker)
            let idx_marker: String = "\"_sel_" + int_to_str(ci) + "\""
            let already_picked: Bool = str_contains(selected, idx_marker)
@@ -1124,12 +1159,29 @@ fn handle_chat(body: String) -> String {
    state_set("conv_history", final_hist)
    conv_history_persist(final_hist)

-    // Automatic session-end summary: write/overwrite the SessionSummary node on each turn
-    // so process restarts always have a continuity snapshot (no shutdown hook needed).
-    // Uses autogenerate (no LLM) so it is cheap — the node is overwritten not appended.
-    let auto_sum: String = session_summary_autogenerate(final_hist)
-    if !str_eq(auto_sum, "") {
-        let discard_sum: String = session_summary_write(auto_sum)
+    // Session-end summary hook: write a dated SessionSummary node once per boot when
+    // the conversation reaches >= 5 user turns (10 hist entries = 5 user+assistant pairs).
+    // Uses a per-boot label ("session:summary:<boot_ts>") so summaries accumulate across
+    // sessions instead of overwriting a single global node. A state flag prevents rewriting
+    // on every subsequent turn once the threshold is crossed.
+    let final_hist_len: Int = json_array_len(final_hist)
+    if final_hist_len >= 10 {
+        let already_wrote: String = state_get("session_summary_written")
+        if str_eq(already_wrote, "") {
+            // Derive (or create) a stable boot-scoped session id.
+            let boot_id: String = state_get("session_boot_id")
+            let boot_id = if str_eq(boot_id, "") {
+                let new_id: String = int_to_str(time_now())
+                state_set("session_boot_id", new_id)
+                new_id
+            } else { boot_id }
+            let sess_label: String = "session:summary:" + boot_id
+            let auto_sum: String = session_summary_autogenerate(final_hist)
+            if !str_eq(auto_sum, "") {
+                let discard_sum: String = session_summary_write_dated(auto_sum, sess_label)
+                state_set("session_summary_written", "1")
+            }
+        }
    }

    let activation_nodes: String = engram_activate_json(message, 2)
@@ -2212,6 +2264,32 @@ fn session_summary_write(summary_text: String) -> String {
    return node_id
 }

+// session_summary_write_dated — write a SessionSummary node with a caller-supplied dated label.
+// Unlike session_summary_write, this does NOT delete old nodes — each session accumulates its
+// own node so engram_search_json("session:summary") can return multiple past sessions.
+// The label must be unique per session (e.g. "session:summary:<boot_ts>").
+fn session_summary_write_dated(summary_text: String, label: String) -> String {
+    if str_eq(summary_text, "") { return "" }
+    if str_eq(label, "") { return "" }
+    let safe_text: String = str_replace(summary_text, "\"", "'")
+    let trimmed: String = if str_len(safe_text) > 800 { str_slice(safe_text, 0, 800) } else { safe_text }
+    let ts: Int = time_now()
+    let ts_str: String = int_to_str(ts)
+    let content: String = "[session-summary] " + trimmed + " | ts:" + ts_str
+    let tags: String = "[\"SessionSummary\",\"session-summary\",\"previous-session\",\"consolidate\"]"
+    let node_id: String = engram_node_full(
+        content, "SessionSummary", label,
+        el_from_float(0.9), el_from_float(0.8), el_from_float(1.0),
+        "Episodic", tags
+    )
+    if str_eq(node_id, "") {
+        println("[chat] session_summary_write_dated: engram write failed — summary node lost (label=" + label + ")")
+        return ""
+    }
+    println("[chat] session_summary_write_dated: wrote SessionSummary (" + int_to_str(str_len(content)) + " chars) label=" + label + " -> " + node_id)
+    return node_id
+}
+
 // session_summary_autogenerate — build a minimal summary from conversation history without LLM.
 // Extracts user message snippets (first 80 chars each, up to 5 turns).
 // Used as the automatic session-end hook so every turn produces a continuity snapshot.