diff --git a/chat.el b/chat.el index 86038d1..0c562e9 100644 --- a/chat.el +++ b/chat.el @@ -48,72 +48,170 @@ fn engram_score_node(node_json: String) -> Int { return salience_100 * importance_100 * recency_100 / 10000 } -// engram_compile_ranked — build a context string from a JSON array of node objects, -// ordered best-first by score. Only nodes above a minimum score (25 = salience 0.5 * -// importance 0.5 * recency 1.0) are included; the rest are noise. Returns at most -// max_nodes entries concatenated as JSON array text. Because el has no sort primitive, -// we do a single selection pass picking the top N by linear scan (N=10 cap). +// engram_compile_ranked — build a ranked list of nodes, best-first by score. +// Fix (Issue #11): uses "|N|" index tracking instead of _sel_N JSON mutation, +// which leaked sentinel fields into the node objects passed to the LLM. +// Threshold lowered to 15 to include moderately-relevant older nodes. fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String { if str_eq(nodes_json, "") { return "" } if str_eq(nodes_json, "[]") { return "" } let total: Int = json_array_len(nodes_json) if total == 0 { return "" } - // Two-pass: first pass finds the top `max_nodes` by score via selection. - // We track selected node indices and their scores to avoid duplicate picks. - let selected: String = "" // comma-sep JSON snippets for chosen nodes - let selected_count: Int = 0 + // selected_indices is a pipe-delimited string of chosen integer indices, e.g. "|2|7|". + // No sentinel fields are injected into the node JSON — the nodes stay clean. + let selected_indices: String = "" + let selected_nodes: String = "" let pass: Int = 0 while pass < max_nodes && pass < total { - // Find the unselected node with the highest score let best_idx: Int = -1 let best_score: Int = -1 let ci: Int = 0 while ci < total { let node: String = json_array_get(nodes_json, ci) let score: Int = engram_score_node(node) - // Only include reasonably relevant nodes (threshold=25) - let above_thresh: Bool = score >= 25 - // Check this index wasn't already selected (sentinel: look for idx marker) - let idx_marker: String = "\"_sel_" + int_to_str(ci) + "\"" - let already_picked: Bool = str_contains(selected, idx_marker) + // Threshold lowered from 25 to 15: includes moderately-relevant older nodes. + // A 3-week-old node with salience 0.6 and importance 0.6 scores ~18. + let above_thresh: Bool = score >= 15 + // Check this index wasn't already selected using the index string. + let idx_marker: String = "|" + int_to_str(ci) + "|" + let already_picked: Bool = str_contains(selected_indices, idx_marker) let is_better: Bool = score > best_score && above_thresh && !already_picked let best_score = if is_better { score } else { best_score } let best_idx = if is_better { ci } else { best_idx } let ci = ci + 1 } - // No more qualifying nodes if best_idx < 0 { let pass = total // break } else { let chosen: String = json_array_get(nodes_json, best_idx) - let sep: String = if str_eq(selected, "") { "" } else { "," } - // Append the index sentinel inline so already_picked checks work - let selected = selected + sep + "{\"_sel_" + int_to_str(best_idx) + "\":1," + str_slice(chosen, 1, str_len(chosen) - 1) + "}" - let selected_count = selected_count + 1 + let sep: String = if str_eq(selected_nodes, "") { "" } else { "," } + let selected_nodes = selected_nodes + sep + chosen + let selected_indices = selected_indices + "|" + int_to_str(best_idx) + "|" } let pass = pass + 1 } - if str_eq(selected, "") { return "" } - // Strip the _sel_N sentinel fields that were used for duplicate-detection bookkeeping. - // The sentinels have the form "\"_sel_N\":1," (trailing comma, space before next key). - // We injected them as the first field in each object, so the pattern is predictable. - // Because el has no regex, remove up to 10 possible sentinel variants by literal replace. - let clean: String = "[" + selected + "]" - let c0: String = str_replace(clean, "\"_sel_0\":1,", "") - let c1: String = str_replace(c0, "\"_sel_1\":1,", "") - let c2: String = str_replace(c1, "\"_sel_2\":1,", "") - let c3: String = str_replace(c2, "\"_sel_3\":1,", "") - let c4: String = str_replace(c3, "\"_sel_4\":1,", "") - let c5: String = str_replace(c4, "\"_sel_5\":1,", "") - let c6: String = str_replace(c5, "\"_sel_6\":1,", "") - let c7: String = str_replace(c6, "\"_sel_7\":1,", "") - let c8: String = str_replace(c7, "\"_sel_8\":1,", "") - let c9: String = str_replace(c8, "\"_sel_9\":1,", "") - return c9 + if str_eq(selected_nodes, "") { return "" } + return "[" + selected_nodes + "]" +} + +// engram_render_node — render a single engram node JSON object as a human-readable +// bullet line for inclusion in the system prompt. Format: - [TYPE age sal] content +// Fix (Issue #3, #4): passes context as prose bullets instead of raw JSON objects, +// which are opaque to the LLM and waste token budget on field names. +fn engram_render_node(node_json: String) -> String { + if str_eq(node_json, "") { return "" } + let content: String = json_get(node_json, "content") + if str_eq(content, "") { return "" } + let node_type: String = json_get(node_json, "node_type") + let type_label: String = if str_eq(node_type, "") { "mem" } else { node_type } + let now_ts: Int = time_now() + let created_str: String = json_get(node_json, "created_at") + let updated_str: String = json_get(node_json, "updated_at") + let ts_raw: String = if str_eq(created_str, "") { updated_str } else { created_str } + let age_label: String = if str_eq(ts_raw, "") { "" } else { + let node_ts: Int = str_to_int(ts_raw) + let age_secs: Int = now_ts - node_ts + let age_days: Int = if age_secs < 0 { 0 } else { age_secs / 86400 } + if age_days == 0 { "today" } else { + if age_days > 30 { "old" } else { int_to_str(age_days) + "d" } + } + } + let salience_str: String = json_get(node_json, "salience") + let sal_100: Int = if str_eq(salience_str, "") { 0 } else { + let s: Int = str_to_int(str_replace(salience_str, ".", "")) + if s > 100 { 100 } else { if s < 0 { 0 } else { s } } + } + let salience_hint: String = if str_eq(salience_str, "") { "" } else { + if sal_100 >= 80 { "high" } else { if sal_100 >= 50 { "med" } else { "low" } } + } + let ann_inner: String = type_label + let ann_inner = if str_eq(age_label, "") { ann_inner } else { ann_inner + " " + age_label } + let ann_inner = if str_eq(salience_hint, "") { ann_inner } else { ann_inner + " " + salience_hint } + let ann: String = "[" + ann_inner + "]" + let snip: String = if str_len(content) > 200 { str_slice(content, 0, 200) } else { content } + return "- " + ann + " " + snip +} + +// engram_render_nodes — render a JSON array of engram nodes as newline-joined +// prose bullet lines. Returns "" when input is empty. +// Fix (Issue #3): called by build_system_prompt to convert raw JSON ctx to +// human-readable bullets before injecting into the LLM system prompt. +fn engram_render_nodes(nodes_json: String) -> String { + if str_eq(nodes_json, "") { return "" } + if str_eq(nodes_json, "[]") { return "" } + let total: Int = json_array_len(nodes_json) + if total == 0 { return "" } + let result: String = "" + let i: Int = 0 + while i < total { + let node: String = json_array_get(nodes_json, i) + let line: String = engram_render_node(node) + let result = if str_eq(line, "") { result } else { + if str_eq(result, "") { line } else { result + "\n" + line } + } + let i = i + 1 + } + return result +} + +// engram_render_ctx — render the ctx string returned by engram_compile as prose bullets. +// ctx may be a JSON array "[...]", a single object "{...}", or up to two such segments +// joined by "\n". We handle the three common shapes produced by engram_compile: +// 1. single JSON array -> engram_render_nodes +// 2. single JSON object -> engram_render_node +// 3. two segments sep by "\n" -> render each half individually and join +// Fix (Issue #3): called by build_system_prompt so the LLM receives human-readable +// prose bullets instead of raw JSON field blobs. +fn engram_render_ctx(ctx: String) -> String { + if str_eq(ctx, "") { return "" } + // Single JSON array. + if str_starts_with(ctx, "[") { + let nl: Int = str_index_of(ctx, "\n") + if nl < 0 { + // Whole ctx is one array. + let r: String = engram_render_nodes(ctx) + if !str_eq(r, "") { return r } + return "" + } + // First segment is an array; try to render it and the rest separately. + let part1: String = str_slice(ctx, 0, nl) + let part2: String = str_slice(ctx, nl + 1, str_len(ctx)) + let r1: String = engram_render_nodes(part1) + let r2: String = if str_starts_with(part2, "[") { + engram_render_nodes(part2) + } else { + if str_starts_with(part2, "{") { engram_render_node(part2) } else { "" } + } + if str_eq(r1, "") { return r2 } + if str_eq(r2, "") { return r1 } + return r1 + "\n" + r2 + } + // Single JSON object (e.g. affective_part node when it's the only result). + if str_starts_with(ctx, "{") { + let nl: Int = str_index_of(ctx, "\n") + if nl < 0 { + let r: String = engram_render_node(ctx) + if !str_eq(r, "") { return r } + return "" + } + let part1: String = str_slice(ctx, 0, nl) + let part2: String = str_slice(ctx, nl + 1, str_len(ctx)) + let r1: String = engram_render_node(part1) + let r2: String = if str_starts_with(part2, "[") { + engram_render_nodes(part2) + } else { + if str_starts_with(part2, "{") { engram_render_node(part2) } else { "" } + } + if str_eq(r1, "") { return r2 } + if str_eq(r2, "") { return r1 } + return r1 + "\n" + r2 + } + // Fallback: ctx is in an unexpected format; return as-is. + return ctx } // is_followup_phrase — returns true when the message is a recognized follow-up @@ -392,6 +490,15 @@ fn engram_compile_multi(primary_seed: String, message: String) -> String { let sep3: String = if !str_eq(merged, "") && !str_eq(ctx3, "") { "\n" } else { "" } let merged = if !str_eq(ctx3, "") { merged + sep3 + ctx3 } else { merged } + // Issue 6 fix: append the bell node exactly once here, after all compile calls. + // engram_compile no longer includes affective_part in its return value; instead it + // caches the bell node in state. By appending it here we guarantee the bell node + // JSON appears at most once in the system prompt's engram block regardless of how + // many engram_compile calls were made above. + let bell_node: String = state_get("engram_compile_bell_node") + let sep4: String = if !str_eq(merged, "") && !str_eq(bell_node, "") { "\n" } else { "" } + let merged = if !str_eq(bell_node, "") { merged + sep4 + bell_node } else { merged } + if str_eq(merged, "") { return "" } // Issue 3 fix: safe JSON boundary-scan truncation — find the last closing brace @@ -476,12 +583,14 @@ fn engram_compile(intent: String) -> String { let bn_ts: Int = if str_eq(bn_ts_raw, "") { 0 } else { str_to_int(bn_ts_raw) } if bn_ts > cutoff_ts { bn0 } else { "" } } else { "" } - let affective_part: String = if !str_eq(recent_bell, "") { recent_bell } else { "" } - + // Issue 6 fix: do NOT include the bell node in this function's return value. + // engram_compile is called multiple times by engram_compile_multi (once per seed). + // If affective_part were appended here, the bell node JSON would appear once per + // compile call — duplicating it in the merged context. Instead, cache the bell node + // here and let engram_compile_multi append it exactly once after all calls complete. let sep1: String = if !str_eq(act_part, "") && !str_eq(srch_part, "") { "\n" } else { "" } let sep2: String = if (!str_eq(act_part, "") || !str_eq(srch_part, "")) && !str_eq(scan_part, "") { "\n" } else { "" } - let sep3: String = if (!str_eq(act_part, "") || !str_eq(srch_part, "") || !str_eq(scan_part, "")) && !str_eq(affective_part, "") { "\n" } else { "" } - let ctx: String = act_part + sep1 + srch_part + sep2 + scan_part + sep3 + affective_part + let ctx: String = act_part + sep1 + srch_part + sep2 + scan_part // Cache bell and activation results for handle_chat reuse (Issues 2, 7). state_set("engram_compile_bell_node", recent_bell) @@ -489,7 +598,8 @@ fn engram_compile(intent: String) -> String { if str_eq(ctx, "") { return "" } - // Fix Issue 6: cap at a clean JSON object boundary. + // Cap at a clean JSON object boundary — scan back from the 6000-char limit to find + // the last closing brace so we never return a truncated mid-object JSON string. let cap_len: Int = 6000 if str_len(ctx) <= cap_len { return ctx } let cap_search: Int = cap_len - 1 @@ -535,10 +645,14 @@ fn build_system_prompt(ctx: String) -> String { "\n\n[IDENTITY GRAPH — who you are, loaded from your engram]\n" + id_ctx } - let engram_block: String = if str_eq(ctx, "") { + // Fix (Issue #3): render ctx as prose bullets before injecting into prompt. + // engram_compile returns raw JSON arrays/objects; engram_render_ctx converts them + // to "- [TYPE age sal] content" lines the LLM can actually read and reason over. + let rendered_ctx: String = if str_eq(ctx, "") { "" } else { engram_render_ctx(ctx) } + let engram_block: String = if str_eq(rendered_ctx, "") { "" } else { - "\n\n[ENGRAM CONTEXT — compiled from your graph]\n" + ctx + "\n\n[ENGRAM CONTEXT — compiled from your graph]\n" + rendered_ctx } let safety_addendum: String = state_get("layered_cycle_safety_system_addendum") @@ -1280,7 +1394,7 @@ fn handle_chat_agentic(body: String) -> String { if str_eq(screen_action, "hard_bell") { safety_log_bell("hard", json_get(screen_result, "reason"), str_slice(message, 0, 80)) return "{\"reply\":\"" + json_safe(safety_validate("", "hard_bell")) + "\",\"model\":\"\",\"agentic\":true,\"tools_used\":[]}" - + } let req_model: String = json_get(body, "model") let model: String = if str_eq(req_model, "") { chat_default_model() } else { req_model } @@ -1705,7 +1819,14 @@ fn handle_dharma_room_turn(body: String) -> String { // The soul's own memories, activated by what it's reading — not injected. // Issue 6 fix: distill_transcript() reduces diffuse embedding noise - let engram_ctx: String = engram_compile(distill_transcript(transcript)) + let engram_ctx_base: String = engram_compile(distill_transcript(transcript)) + // Append the cached bell node once (engram_compile no longer includes it inline + // to avoid duplication when called multiple times — see engram_compile_multi). + let dharma_bell: String = state_get("engram_compile_bell_node") + let engram_ctx: String = if !str_eq(dharma_bell, "") { + let sep: String = if !str_eq(engram_ctx_base, "") { "\n" } else { "" } + engram_ctx_base + sep + dharma_bell + } else { engram_ctx_base } let system_prompt: String = if str_eq(engram_ctx, "") { identity } else { @@ -1758,7 +1879,14 @@ fn handle_dharma_room_turn_agentic(body: String) -> String { } // Issue 6 fix: distill_transcript() reduces diffuse embedding noise - let ctx: String = engram_compile(distill_transcript(transcript)) + let ctx_base: String = engram_compile(distill_transcript(transcript)) + // Append the cached bell node once (engram_compile no longer includes it inline + // to avoid duplication when called multiple times — see engram_compile_multi). + let dharma_bell2: String = state_get("engram_compile_bell_node") + let ctx: String = if !str_eq(dharma_bell2, "") { + let sep: String = if !str_eq(ctx_base, "") { "\n" } else { "" } + ctx_base + sep + dharma_bell2 + } else { ctx_base } let system: String = identity + " You have access to tools: read files, write files, browse the web, search your memory, run commands. Use them when they add genuine value. Be direct and stay in character.\n\n" + ctx let api_key: String = agentic_api_key()