Compare commits

...

1 Commits

Author SHA1 Message Date
will.anderson 978a6812d7 fix(recall): address all remaining code review issues
Issue 1 (CRITICAL): Fix auto_persist brace structure. The closing brace for
the is_bell block was missing, causing the conv_node_id error-log check to
be unreachable dead code inside the if block and silently breaking
strengthen_chat_nodes. Add the missing } to close the is_bell block before
the conv_node_id guard.

Issue 2 (CRITICAL): Restore session_exists() call in handle_chat_agentic.
The behavioral regression replacing session_exists() with
!str_contains(session_get(...), '"error"') was reverted. session_get()
returns valid JSON for any non-empty session ID (including fabricated ones),
so the check always passed. session_exists() does a proper state-index and
engram search.

Issue 3 (HIGH): Extend sentinel field cleanup in engram_compile_ranked from
_sel_14 to _sel_39. The recall-boost path passes a 40-candidate pool
(search_json=40) so nodes at positions 15-39 produced _sel_N sentinels that
leaked into the LLM context prompt. Cleanup chain now covers all 40 indices.

Issue 4 (HIGH): Fix engram_is_continuation false positives. Remove How, Why,
When, Where, and What about from the continuation-opener list as these
commonly introduce new topics. Remove the 80-char length fallback which
incorrectly classified any short message (including new-topic questions like
'What is quantum computing?') as a continuation.

Issue 5 (HIGH): Rewrite hist_trim_with_bell_guard to use json_array_get for
structural parsing, matching the fix already applied to hist_trim. The old
str_index_of('{"role":') pattern could corrupt history when message content
contained that literal string. The function now delegates the actual trim to
hist_trim() after the bell-preservation check.

Issue 6 (NORMAL): Fix entity_count scoping in engram_extract_entities. Move
the entity_count increment to the while-body level as an if-expression
assignment so it escapes the if-expression branch scope and the < 10 guard
actually terminates the loop early.

Issue 7 (NORMAL): Fix mcp_call_seq race in call_mcp_bridge. Replace the
non-atomic time+seq temp file path with uuid_v4() for collision-free
uniqueness under concurrent load, matching the approach used by
next_bridge_id().

Issue 8 (NORMAL): Fix safe JSON truncation for combined main_part + affective
array format. When ctx is '[array]\n{bell_object}' and truncation falls
inside the affective single-object portion, the old code appended ']'
producing invalid JSON. Now detects the newline separator and drops only the
partial affective object, returning the complete main array.

Issue 9 (NORMAL): Handle 4th+ topics in engram_compile. engram_split_topics
is recursive and can produce more than 3 newline-separated segments. Add a
nodes3 pass that collects all topic text after the third segment as one
combined search, and include it in the merge chain so no topics are silently
dropped.
2026-06-22 13:36:41 -05:00
+100 -46
View File
@@ -119,7 +119,32 @@ fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String {
let c12: String = str_replace(c11, "\"_sel_12\":1,", "")
let c13: String = str_replace(c12, "\"_sel_13\":1,", "")
let c14: String = str_replace(c13, "\"_sel_14\":1,", "")
return c14
let c15: String = str_replace(c14, "\"_sel_15\":1,", "")
let c16: String = str_replace(c15, "\"_sel_16\":1,", "")
let c17: String = str_replace(c16, "\"_sel_17\":1,", "")
let c18: String = str_replace(c17, "\"_sel_18\":1,", "")
let c19: String = str_replace(c18, "\"_sel_19\":1,", "")
let c20: String = str_replace(c19, "\"_sel_20\":1,", "")
let c21: String = str_replace(c20, "\"_sel_21\":1,", "")
let c22: String = str_replace(c21, "\"_sel_22\":1,", "")
let c23: String = str_replace(c22, "\"_sel_23\":1,", "")
let c24: String = str_replace(c23, "\"_sel_24\":1,", "")
let c25: String = str_replace(c24, "\"_sel_25\":1,", "")
let c26: String = str_replace(c25, "\"_sel_26\":1,", "")
let c27: String = str_replace(c26, "\"_sel_27\":1,", "")
let c28: String = str_replace(c27, "\"_sel_28\":1,", "")
let c29: String = str_replace(c28, "\"_sel_29\":1,", "")
let c30: String = str_replace(c29, "\"_sel_30\":1,", "")
let c31: String = str_replace(c30, "\"_sel_31\":1,", "")
let c32: String = str_replace(c31, "\"_sel_32\":1,", "")
let c33: String = str_replace(c32, "\"_sel_33\":1,", "")
let c34: String = str_replace(c33, "\"_sel_34\":1,", "")
let c35: String = str_replace(c34, "\"_sel_35\":1,", "")
let c36: String = str_replace(c35, "\"_sel_36\":1,", "")
let c37: String = str_replace(c36, "\"_sel_37\":1,", "")
let c38: String = str_replace(c37, "\"_sel_38\":1,", "")
let c39: String = str_replace(c38, "\"_sel_39\":1,", "")
return c39
}
// engram_split_topics split message into sub-queries on explicit conjunctions.
@@ -170,9 +195,11 @@ fn engram_extract_entities(message: String) -> String {
let already_have: Bool = str_contains(entities, word)
let should_add: Bool = is_capital && !is_stop && !already_have && word_len >= 3
let entities = if should_add {
let entity_count = entity_count + 1
if str_eq(entities, "") { word } else { entities + "\n" + word }
} else { entities }
// Increment entity_count at the while-body level so the binding escapes the
// if-expression scope and the entity_count < 10 guard actually terminates early.
let entity_count = if should_add { entity_count + 1 } else { entity_count }
let pos = if wend > pos { wend + 1 } else { pos + 1 }
}
return entities
@@ -201,10 +228,12 @@ fn engram_detect_recall_intent(message: String) -> Bool {
|| str_contains(message, "what happened with")
}
// engram_is_continuation semantic continuation detection replacing the brittle 50-char
// threshold. Returns true when message starts with a pronoun, continuation opener, or is
// < 80 chars (raised from 50 to catch "Can you remind me what Prism's architecture
// looks like?" at 57 chars which is clearly a continuation in an active thread).
// engram_is_continuation detect whether a message continues the active thread.
// Returns true only when the message starts with a pronoun or an unambiguous
// discourse continuation marker. Does NOT classify by message length: short messages
// that introduce a new topic (e.g. "What is quantum computing?") are not continuations.
// Does NOT classify interrogative starters (How, Why, When, Where, What about) as
// continuations these commonly open new topics and the false-positive cost is too high.
fn engram_is_continuation(message: String, hist_len: Int) -> Bool {
if hist_len <= 0 { return false }
let has_pronoun: Bool = str_starts_with(message, "It ")
@@ -216,6 +245,7 @@ fn engram_is_continuation(message: String, hist_len: Int) -> Bool {
|| str_starts_with(message, "She ") || str_starts_with(message, "she ")
|| str_starts_with(message, "We ") || str_starts_with(message, "we ")
if has_pronoun { return true }
// Only unambiguous discourse connectors that cannot open a new topic on their own.
let is_cont_opener: Bool = str_starts_with(message, "Go on")
|| str_starts_with(message, "go on")
|| str_starts_with(message, "Continue") || str_starts_with(message, "continue")
@@ -224,12 +254,7 @@ fn engram_is_continuation(message: String, hist_len: Int) -> Bool {
|| str_starts_with(message, "Ok") || str_starts_with(message, "ok")
|| str_starts_with(message, "And ") || str_starts_with(message, "and ")
|| str_starts_with(message, "But ") || str_starts_with(message, "but ")
|| str_starts_with(message, "What about") || str_starts_with(message, "what about")
|| str_starts_with(message, "Why ") || str_starts_with(message, "why ")
|| str_starts_with(message, "How ") || str_starts_with(message, "how ")
|| str_starts_with(message, "When ") || str_starts_with(message, "when ")
if is_cont_opener { return true }
if str_len(message) < 80 { return true }
return false
}
@@ -306,6 +331,26 @@ fn engram_compile(intent: String) -> String {
}
} else { "" }
// Fourth+ topic segments: engram_split_topics is recursive and can produce 4+ lines.
// Rather than hardcoding each topic index, collect everything after the third topic
// as a single combined search query so no segments are silently dropped.
// This handles inputs like "a and b and c and d" (4 topics).
let nodes3: String = if has_multi_topic {
let nl0: Int = str_index_of(topics, "\n")
let rest1: String = str_slice(topics, nl0 + 1, str_len(topics))
let nl1: Int = str_index_of(rest1, "\n")
if nl1 < 0 { "" } else {
let rest2: String = str_slice(rest1, nl1 + 1, str_len(rest1))
let nl2: Int = str_index_of(rest2, "\n")
if nl2 < 0 { "" } else {
// Remainder after the third segment may span one or more topics.
// Search with the remaining text as-is; engram_compile_multi handles it.
let rest3: String = str_slice(rest2, nl2 + 1, str_len(rest2))
if str_eq(rest3, "") { "" } else { engram_compile_multi(rest3) }
}
}
} else { "" }
// Issue 2 cont.: entity 0 dedicated search (15 candidates, ranked 6).
let entity_nodes0: String = if has_entities {
let nl_e0: Int = str_index_of(entity_list, "\n")
@@ -342,6 +387,7 @@ fn engram_compile(intent: String) -> String {
// Merge all pools, deduplicating at each step.
let merged: String = engram_nodes_merge(nodes0, nodes1)
let merged: String = engram_nodes_merge(merged, nodes2)
let merged: String = engram_nodes_merge(merged, nodes3)
let merged: String = engram_nodes_merge(merged, entity_nodes0)
let merged: String = engram_nodes_merge(merged, entity_nodes1)
let merged: String = engram_nodes_merge(merged, recall_boost)
@@ -388,8 +434,16 @@ fn engram_compile(intent: String) -> String {
if str_eq(ctx, "") { return "" }
// Issue 7 fix: safe JSON truncation find last closing brace before budget cap.
// Safe JSON truncation find last closing brace before budget cap.
// Budget raised from 6000 to 8000 for the larger multi-topic pool.
//
// Issue 8 fix: ctx may be main_part (JSON array) + "\n" + affective_part (single JSON
// object). When truncation cuts into the affective_part, appending "]" would produce
// "[array_content]{partial_bell_node}]" not valid JSON. Guard: only append "]" when
// the truncation point falls strictly within the main array portion (before the "\n"
// separator). If the cut falls in the affective part, drop the partial object entirely
// and return only the complete main array. If there is no separator (ctx is a plain
// array with no affective part), the original append-"]" behaviour applies.
let budget: Int = 8000
if str_len(ctx) <= budget { return ctx }
let search_end: Int = budget - 1
@@ -403,7 +457,18 @@ fn engram_compile(intent: String) -> String {
}
if found_pos < 0 { return str_slice(ctx, 0, budget) }
let truncated: String = str_slice(ctx, 0, found_pos + 1)
if str_starts_with(ctx, "[") { return truncated + "]" }
if str_starts_with(ctx, "[") {
// Determine whether this ctx has a separate affective object appended after the array.
// The format is: main_array + "\n" + bell_object. Find the array boundary.
let nl_pos: Int = str_index_of(ctx, "\n")
let has_affective_sep: Bool = nl_pos > 0 && nl_pos < str_len(ctx) - 1
if has_affective_sep && found_pos > nl_pos {
// Truncation fell inside the affective_part drop it and return just the main array.
return str_slice(ctx, 0, nl_pos)
}
// Truncation is within the main array close it properly.
return truncated + "]"
}
return truncated
}
fn json_safe(s: String) -> String {
@@ -505,23 +570,19 @@ fn hist_trim(hist: String) -> String {
// a bell event. If it did, write a preservation node to engram so the distress exchange
// survives the 20-turn window. The LLM window drops it; engram retains it permanently
// and engram_compile will surface it again via the affective context path.
//
// Fix: use json_array_get for structural parsing (immune to {"role": appearing in
// message content) same fix applied to hist_trim. The old str_index_of("{\"role\":")
// pattern could corrupt history when any message contained that literal string.
fn hist_trim_with_bell_guard(hist: String) -> String {
// Extract the first turn (should be a user message) to inspect it.
let inner: String = str_slice(hist, 1, str_len(hist) - 1)
let marker: String = "{\"role\":"
let i1: Int = str_index_of(inner, marker)
// i1 is the start of the first entry within inner.
// Find where the second entry begins to delimit the first entry's JSON.
let tail1: String = str_slice(inner, i1 + 1, str_len(inner))
let i2: Int = str_index_of(tail1, marker)
// The first entry spans from i1 to (i1 + 1 + i2 - 1) within inner.
let first_entry_raw: String = if i2 > 0 {
str_slice(inner, i1, i1 + 1 + i2 - 1)
} else {
str_slice(inner, i1, str_len(inner))
}
let first_role: String = json_get(first_entry_raw, "role")
let first_content: String = json_get(first_entry_raw, "content")
let total: Int = json_array_len(hist)
// Safety: never trim below 2 entries.
if total <= 2 { return hist }
// Extract the first entry structurally immune to content containing {"role":
let first_entry: String = json_array_get(hist, 0)
let first_role: String = json_get(first_entry, "role")
let first_content: String = json_get(first_entry, "content")
// Only inspect user turns assistant content doesn't carry bell signals.
let bell_level: String = if str_eq(first_role, "user") {
@@ -554,13 +615,9 @@ fn hist_trim_with_bell_guard(hist: String) -> String {
)
}
// Now perform the standard trim (drop oldest 2 entries = 1 user + 1 assistant pair).
let tail2: String = str_slice(tail1, i2 + 1, str_len(tail1))
let i3: Int = str_index_of(tail2, marker)
if i3 >= 0 {
return "[" + str_slice(tail2, i3, str_len(tail2)) + "]"
}
return hist
// Now perform the standard trim: drop entries 0 and 1 (oldest user+assistant pair).
// Reuse hist_trim's structural approach rebuild from entry 2 onward.
return hist_trim(hist)
}
// clean_llm_response strips GPT-2 BPE byte-to-unicode artifacts that vLLM
@@ -1022,15 +1079,11 @@ fn agentic_tools_all() -> String {
fn call_mcp_bridge(tool_name: String, tool_input: String) -> String {
let eff_input: String = if str_eq(tool_input, "") { "{}" } else { tool_input }
let body: String = "{\"name\":\"" + tool_name + "\",\"input\":" + eff_input + "}"
// Issue #12: previously used a fixed path /tmp/neuron-mcp-call.json.
// Under concurrent load (64 worker threads), two simultaneous MCP tool calls
// race on this file one call sends the other's input to the bridge.
// Fix: monotonic sequence counter makes the path unique per call.
let mcp_seq_s: String = state_get("mcp_call_seq")
let mcp_seq_n: Int = if str_eq(mcp_seq_s, "") { 0 } else { str_to_int(mcp_seq_s) }
let mcp_seq_next: Int = mcp_seq_n + 1
state_set("mcp_call_seq", int_to_str(mcp_seq_next))
let tmp: String = "/tmp/neuron-mcp-call-" + int_to_str(time_now()) + "-" + int_to_str(mcp_seq_next) + ".json"
// Issue #12: previously used a fixed path /tmp/neuron-mcp-call.json, then a
// time+seq path that still raced (time_now() is 1s granularity; non-atomic seq RMW).
// Fix: uuid_v4() provides collision-free uniqueness regardless of concurrency
// same approach used by next_bridge_id(). No state read/write needed.
let tmp: String = "/tmp/neuron-mcp-call-" + uuid_v4() + ".json"
fs_write(tmp, body)
return exec_capture("curl -s --max-time 30 -X POST http://127.0.0.1:7771/mcp/call -H 'Content-Type: application/json' -d @" + tmp)
}
@@ -1374,7 +1427,7 @@ fn handle_chat_agentic(body: String) -> String {
let session_valid: Bool = if str_eq(req_session, "") {
true
} else {
!str_contains(session_get(req_session), "\"error\"")
session_exists(req_session)
}
if !session_valid {
return "{\"error\":\"session not found\",\"session_id\":\"" + req_session + "\",\"reply\":\"\"}"
@@ -2014,6 +2067,7 @@ fn auto_persist(req: String, resp: String) -> Void {
"session_bell_signal:" + sess_id
}
state_set(signal_key, safe_summary)
}
if str_eq(conv_node_id, "") {
println("[chat] auto_persist: engram_node_full returned empty — conversation node lost (ts=" + ts_str + ")")
}