diff --git a/chat.el b/chat.el index 98be512..63553fc 100644 --- a/chat.el +++ b/chat.el @@ -16,20 +16,28 @@ fn chat_default_model() -> String { // %g format) to an integer in the range 0..100. // // The runtime serializes floats with %g which drops trailing zeros: -// 1.0 -> "1" (no decimal at all) -// 0.9 -> "0.9" (one decimal digit) -// 0.85 -> "0.85" (two decimal digits) +// 1.0 -> "1" (no decimal at all) +// 0.9 -> "0.9" (one decimal digit) +// 0.85 -> "0.85" (two decimal digits) +// 0.125 -> "0.125" (three decimal digits — %g does not round to 2 dp) // // The old approach of str_replace(s, ".", "") then str_to_int was broken: -// "0.9" -> "09" -> str_to_int -> 9 (should be 90) -// "0.5" -> "05" -> str_to_int -> 5 (should be 50) -// "1" -> "1" -> str_to_int -> 1 (should be 100) -// "0.85" -> "085" -> str_to_int -> 85 (accidentally correct) +// "0.9" -> "09" -> str_to_int -> 9 (should be 90) +// "0.5" -> "05" -> str_to_int -> 5 (should be 50) +// "1" -> "1" -> str_to_int -> 1 (should be 100) +// "0.85" -> "085" -> str_to_int -> 85 (accidentally correct) +// "0.125" -> "0125" -> str_to_int -> 125 -> clamped to 100 (wrong: should be 12) // // Fix: detect presence and position of the decimal point, then scale accordingly. // - No decimal (e.g. "1"): multiply by 100. // - One decimal digit (e.g. "0.9"): multiply by 10 to get 90. -// - Two+ decimal digits (e.g. "0.85"): use as-is (already hundredths). +// - Two decimal digits (e.g. "0.85"): use as-is (already hundredths). +// - Three+ decimal digits: stripped integer is in units of 10^N (where N=digits +// after the dot), so divide by 10^(N-2) to reduce to hundredths. Examples: +// "0.125" -> stripped=125, N=3 -> 125/10 = 12 +// "0.375" -> stripped=375, N=3 -> 375/10 = 37 +// "0.625" -> stripped=625, N=3 -> 625/10 = 62 +// "0.875" -> stripped=875, N=3 -> 875/10 = 87 fn parse_salience_100(s: String) -> Int { if str_eq(s, "") { return 70 } let dot_pos: Int = str_index_of(s, ".") @@ -40,7 +48,25 @@ fn parse_salience_100(s: String) -> Int { let after_dot: String = str_slice(s, dot_pos + 1, str_len(s)) let decimal_digits: Int = str_len(after_dot) let stripped: Int = str_to_int(str_replace(s, ".", "")) - if decimal_digits == 1 { stripped * 10 } else { stripped } + if decimal_digits == 1 { + stripped * 10 + } else { + if decimal_digits == 2 { + stripped + } else { + // 3+ decimal digits: divide out the extra precision to get hundredths. + // extra = decimal_digits - 2; divisor = 10^extra. + let extra: Int = decimal_digits - 2 + let divisor: Int = if extra == 1 { 10 } else { + if extra == 2 { 100 } else { + if extra == 3 { 1000 } else { + if extra == 4 { 10000 } else { 100000 } + } + } + } + stripped / divisor + } + } } if raw > 100 { 100 } else { if raw < 0 { 0 } else { raw } } } @@ -397,6 +423,47 @@ fn hist_trim_with_bell_guard(hist: String) -> String { return hist } +// hist_trim_to_byte_cap — drop oldest user+assistant pairs until the history blob +// is at or below `cap_bytes` in length, or until only 2 entries remain (the minimum +// safe window). Uses the same structural json_array_len/json_array_get approach as +// hist_trim to stay immune to content containing JSON marker strings. +// +// Called after count-based trimming to enforce a hard size ceiling on the history +// blob. Without this cap, long technical sessions with large assistant responses +// (code blocks, logs, analysis) can push the 40-turn window to 100KB+, which causes +// engram_node_full writes to grow state entries unboundedly. +fn hist_trim_to_byte_cap(hist: String, cap_bytes: Int) -> String { + let current: String = hist + let current_len: Int = str_len(current) + while current_len > cap_bytes { + let total: Int = json_array_len(current) + // Never trim below 2 entries (1 pair). + if total <= 2 { + let current_len = 0 // exit loop + } else { + // Drop entries 0 and 1 (oldest pair). + let result: String = "" + let i: Int = 2 + while i < total { + let entry: String = json_array_get(current, i) + let result = if str_eq(result, "") { + entry + } else { + result + "," + entry + } + let i = i + 1 + } + if str_eq(result, "") { + let current_len = 0 // exit loop + } else { + let current = "[" + result + "]" + let current_len = str_len(current) + } + } + } + return current +} + // clean_llm_response — strips GPT-2 BPE byte-to-unicode artifacts that vLLM // emits when the tokenizer hasn't decoded back to raw bytes. // @@ -651,19 +718,23 @@ fn handle_chat(body: String) -> String { let updated_hist2: String = hist_append(updated_hist, "assistant", raw_response) // Use bell-guarded trim: if the evicted turn triggered a bell event, it is // preserved to engram before being dropped from the in-memory window. - // Issue #8 (NO MAX SIZE GUARD): the 20-turn count limit bounds entry count, but individual - // messages can be arbitrarily large (up to max_tokens = 4096 tokens each). At 20 turns the - // history blob can reach ~80KB before trim fires. engram_node_full has no apparent size cap. - // A byte-length cap would require truncating or summarising entries — too invasive here. - // TODO: add a byte-length cap (e.g. 32KB) that drops oldest entries until under limit. // Increased from 20 to 40 turns: long technical sessions lose early context at 20 // (10 user + 10 assistant pairs). 40 turns preserves problem framing for multi-step // tasks while the bell guard still persists evicted distress turns to engram. - let final_hist: String = if json_array_len(updated_hist2) > 40 { + // Byte-cap: after count-based trim, also trim oldest pairs until the history blob + // is under 32KB. Long technical sessions with large assistant responses (code blocks, + // analysis) can produce 100-160KB+ state entries at 40 turns; the count limit alone + // is insufficient. We retain at least 2 entries (1 user + 1 assistant pair) regardless. + let count_trimmed: String = if json_array_len(updated_hist2) > 40 { hist_trim_with_bell_guard(updated_hist2) } else { updated_hist2 } + let final_hist: String = if str_len(count_trimmed) > 32768 { + hist_trim_to_byte_cap(count_trimmed, 32768) + } else { + count_trimmed + } state_set("conv_history", final_hist) conv_history_persist(final_hist) @@ -1242,7 +1313,13 @@ fn handle_chat_agentic(body: String) -> String { let updated: String = hist_append(agentic_hist, "user", message) let updated2: String = hist_append(updated, "assistant", reply_text) // Increased from 20 to 40 turns: consistent with handle_chat window expansion. - let trimmed: String = if json_array_len(updated2) > 40 { hist_trim(updated2) } else { updated2 } + // Byte-cap: also trim if the blob exceeds 32KB, consistent with handle_chat. + let count_trimmed2: String = if json_array_len(updated2) > 40 { hist_trim(updated2) } else { updated2 } + let trimmed: String = if str_len(count_trimmed2) > 32768 { + hist_trim_to_byte_cap(count_trimmed2, 32768) + } else { + count_trimmed2 + } state_set(hist_key, trimmed) // Only persist the default global session to engram — named sessions are ephemeral. if str_eq(hist_key, "conv_history") { diff --git a/memory.el b/memory.el index 684b4c4..11fd220 100644 --- a/memory.el +++ b/memory.el @@ -73,20 +73,49 @@ fn mem_consolidate() -> String { let wi = wi + 1 } - // (b) Strengthen Canonical-tier nodes from a scan so they resist temporal decay. - // Canonical nodes encode foundational identity — they must not silently floor at 10. - let scan_result: String = engram_scan_nodes_json(50, 0) - let scan_len: Int = json_array_len(scan_result) - let si: Int = 0 - while si < scan_len { - let s_node: String = json_array_get(scan_result, si) - let s_tier: String = json_get(s_node, "tier") - let s_id: String = json_get(s_node, "id") - if str_eq(s_tier, "Canonical") && !str_eq(s_id, "") { - engram_strengthen(s_id) - let strengthened = strengthened + 1 + // (b) Strengthen Canonical-tier nodes from a full paginated scan so they resist + // temporal decay. Canonical nodes encode foundational identity — they must not + // silently floor at 10. Page size 50, scanning until fewer than 50 nodes are + // returned (last page), so all Canonical nodes are reached even in large graphs. + // Without pagination, only the first 50 nodes in the graph were eligible; any + // Canonical node at index 50+ was silently excluded from the boost. + // Strengthening is skipped if the node's current salience is already at the + // runtime ceiling (represented as "1" by %g) to avoid monotonic unbounded growth. + // Canonical nodes with salience < 1.0 are strengthened each consolidation pass; + // once they reach the ceiling the runtime will no longer raise them further, so + // calling engram_strengthen at the ceiling is a no-op in the runtime anyway, but + // the explicit check makes the intent clear and avoids any runtime log noise. + let page_size: Int = 50 + let scan_offset: Int = 0 + let scan_done: Bool = false + while !scan_done { + let scan_result: String = engram_scan_nodes_json(page_size, scan_offset) + let scan_len: Int = json_array_len(scan_result) + if scan_len == 0 { + let scan_done = true + } else { + let si: Int = 0 + while si < scan_len { + let s_node: String = json_array_get(scan_result, si) + let s_tier: String = json_get(s_node, "tier") + let s_id: String = json_get(s_node, "id") + let s_sal: String = json_get(s_node, "salience") + // Only strengthen if below the ceiling to prevent unbounded salience growth. + // engram serialises the ceiling as "1" (%g drops the decimal part when it + // is exactly zero). Any other value is below ceiling and should be boosted. + let at_ceiling: Bool = str_eq(s_sal, "1") + if str_eq(s_tier, "Canonical") && !str_eq(s_id, "") && !at_ceiling { + engram_strengthen(s_id) + let strengthened = strengthened + 1 + } + let si = si + 1 + } + let scan_offset = scan_offset + scan_len + // Fewer results than page_size means we've reached the last page. + if scan_len < page_size { + let scan_done = true + } } - let si = si + 1 } let total_nodes: Int = engram_node_count() diff --git a/soul.el b/soul.el index 56b71a4..be09490 100644 --- a/soul.el +++ b/soul.el @@ -186,8 +186,20 @@ fn load_identity_context() -> Void { while ai < aff_total { let aff_node: String = json_array_get(affective_raw, ai) let aff_content: String = json_get(aff_node, "content") - let aff_ts_str: String = json_get(aff_node, "ts") - let aff_ts: Int = if str_eq(aff_ts_str, "") { ts_now } else { str_to_int(aff_ts_str) } + // Use created_at (the standard engram node timestamp field), consistent + // with handle_chat which reads created_at / updated_at. The previous + // field name "ts" is not a standard engram field: it was present in some + // BellEvent content payloads but absent from standard engram node JSON, + // causing json_get to return "" and the fallback to ts_now — meaning ALL + // nodes with a missing "ts" field appeared recent, over-including stale + // content. With the 14-day window, this amplification was significant. + // Fix: read created_at first, fall back to updated_at, then default to 0 + // (same as handle_chat). A ts of 0 always fails the cutoff check, so nodes + // missing both timestamp fields are conservatively excluded rather than + // blindly included. + let aff_ca: String = json_get(aff_node, "created_at") + let aff_ts_str: String = if str_eq(aff_ca, "") { json_get(aff_node, "updated_at") } else { aff_ca } + let aff_ts: Int = if str_eq(aff_ts_str, "") { 0 } else { str_to_int(aff_ts_str) } let is_recent: Bool = aff_ts >= ts_cutoff let snip: String = if str_len(aff_content) > 200 { str_slice(aff_content, 0, 200) } else { aff_content } let aff_ctx = if is_recent && !str_eq(snip, "") {