Compare commits

..

2 Commits

Author SHA1 Message Date
will.anderson 615f0cee08 fix(reliability): conv-history — asymmetric load, silent failures, broken trim, agentic gap
Neuron Soul CI / build (pull_request) Has been cancelled
Issues addressed:
- #1 ASYMMETRIC PERSIST/LOAD: conv_history_load() now tries engram_get_node_by_label()
  first (symmetric with the label-based write), falling back to vector search only when
  label lookup returns nothing. Immune to cold/corrupt vector index.
- #2 SILENT LOAD FAILURE: all failure paths in conv_history_load() and conv_history_persist()
  now emit a println log line rather than silently returning "" or dropping writes.
- #3 NO RECOVERY PATH: documented as TODO with explanation of why a full recovery path
  (retry, ID fallback, orphan cleanup) is too invasive for a targeted fix here.
- #4 OVERWRITE WITHOUT DELETE: documented with TODO to replace engram_node_full with
  explicit delete-then-create once engram exposes a label-scoped delete API.
- #5/#10 BROKEN TRIM / OFF-BY-ONE: hist_trim() rewritten to use json_array_len /
  json_array_get (structural JSON ops) instead of raw str_index_of scanning for
  '{"role":' markers. Immune to marker strings appearing inside message content.
  Minimum retained count guard added: never trims below 2 entries.
- #6 PARTIAL-WRITE GUARD: conv_history_persist() refuses to write a blob that doesn't
  contain both '[' and ']'. conv_history_load() requires both before accepting content.
- #7 DUAL STORAGE: documented with a comment at the persist call site.
- #8 NO MAX SIZE GUARD: documented as TODO with rationale for why a byte-length cap
  requires a more invasive change (entry truncation or summarisation).
- #9 AGENTIC HISTORY NOT PERSISTED: handle_chat_agentic() now calls conv_history_persist()
  for the default global session (hist_key == "conv_history") after updating state,
  matching the non-agentic path's durability. Named sessions remain in-process only.
2026-06-22 11:46:00 -05:00
will.anderson d3eda47fd3 feat(ci): strip debug symbols from soul binary before publishing
Neuron Soul CI / build (pull_request) Has been cancelled
Add strip -s after gcc compilation to remove symbol table and relocation info.
Reduces binary size and prevents symbol-level reverse engineering of EL runtime internals.
2026-06-22 11:37:28 -05:00
6 changed files with 126 additions and 69 deletions
+4
View File
@@ -134,6 +134,10 @@ jobs:
-lssl -lcrypto -lcurl -lpthread -lm \
-o dist/neuron
# Strip debug symbols and non-essential symbol table entries.
# -s removes the symbol table + relocation info (max size reduction).
# Keeps the binary functional; debuggability is preserved via source + CI logs.
strip -s dist/neuron
ls -lh dist/neuron
- name: Smoke test
+1 -4
View File
@@ -23,14 +23,11 @@ fn ise_post(content: String) -> Void {
let ise_url: String = env("SOUL_ISE_URL")
let engram_url: String = if str_eq(ise_url, "") { state_get("soul_engram_url") } else { ise_url }
if str_eq(engram_url, "") {
let local_id: String = engram_node_full(
let discard: String = engram_node_full(
content, "InternalStateEvent", "state-event",
el_from_float(0.3), el_from_float(0.3), el_from_float(0.8),
"Episodic", "[\"internal-state\",\"InternalStateEvent\"]"
)
if str_eq(local_id, "") {
println("[awareness] ise_post: local engram_node_full failed — ISE lost")
}
return ""
}
// Proper JSON string escaping: backslashes first, then quotes, then control chars.
+115 -37
View File
@@ -94,18 +94,39 @@ fn hist_append(hist: String, role: String, content: String) -> String {
return "[" + inner + "," + entry + "]"
}
// hist_trim drop the oldest two entries from a history JSON array.
//
// Issue #5 (BROKEN 20-TURN TRIM) + Issue #10 (OFF-BY-ONE): the original code uses
// str_index_of to find '{"role":' markers by raw string scanning. If any message content
// contains the literal string '{"role":' (e.g. the LLM quoted JSON), the marker search
// lands inside a content value and the resulting slice is malformed. Additionally, the
// function had no minimum-retained-count guard.
//
// Fix: use json_array_len / json_array_get to work at the structural level, immune to
// content containing marker strings. Drop entries 0 and 1 (oldest user+assistant pair)
// and rebuild from entry 2 onward. Minimum retained count: 2 entries (never over-trim).
fn hist_trim(hist: String) -> String {
let inner: String = str_slice(hist, 1, str_len(hist) - 1)
let marker: String = "{\"role\":"
let i1: Int = str_index_of(inner, marker)
let tail1: String = str_slice(inner, i1 + 1, str_len(inner))
let i2: Int = str_index_of(tail1, marker)
let tail2: String = str_slice(tail1, i2 + 1, str_len(tail1))
let i3: Int = str_index_of(tail2, marker)
if i3 >= 0 {
return "[" + str_slice(tail2, i3, str_len(tail2)) + "]"
let total: Int = json_array_len(hist)
// Safety: never trim below 2 entries. If already at or below the minimum, return unchanged.
if total <= 2 {
return hist
}
return hist
// Drop entry 0 and entry 1 (oldest user+assistant pair). Rebuild from entry 2 onward.
let result: String = ""
let i: Int = 2
while i < total {
let entry: String = json_array_get(hist, i)
let result = if str_eq(result, "") {
entry
} else {
result + "," + entry
}
let i = i + 1
}
if str_eq(result, "") {
return hist
}
return "[" + result + "]"
}
// clean_llm_response strips GPT-2 BPE byte-to-unicode artifacts that vLLM
@@ -124,32 +145,72 @@ fn clean_llm_response(s: String) -> String {
}
// conv_history_persist save conversation history to engram for cross-restart continuity.
// Stores as a Conversation node. Overwrites by using consistent label "conv:history".
// Stores as a Conversation node with label "conv:history".
//
// Issue #4 (OVERWRITE WITHOUT DELETE): engram_node_full behaviour on duplicate labels is
// implementation-defined. If it appends rather than upserts, stale older nodes accumulate.
// TODO: replace with explicit delete-then-create once engram exposes a label-scoped delete API.
//
// Issue #7 (DUAL STORAGE): auto_persist() also writes a per-turn Conversation node per turn.
// Both run every turn for different purposes (rolling array vs. Q&A snapshot). Documented here.
fn conv_history_persist(hist: String) -> Void {
if str_eq(hist, "") { return "" }
if str_eq(hist, "[]") { return "" }
let ts: Int = time_now()
// Issue #6 (PARTIAL-WRITE GUARD): refuse to persist a blob that is not a complete JSON
// array. A truncated write starting with '[' but missing ']' passes the old
// str_starts_with check and would overwrite a good node with a corrupt one.
if !str_starts_with(hist, "[") { return "" }
if !str_contains(hist, "]") { return "" }
let tags: String = "[\"conv-history\",\"persistent\"]"
let node_id: String = engram_node_full(
hist, "Conversation", "conv:history",
el_from_float(0.7), el_from_float(0.8), el_from_float(0.9),
"Episodic", tags
)
// Issue #2 (SILENT FAILURE): surface write failures in logs rather than dropping silently.
if str_eq(node_id, "") {
println("[chat] conv_history_persist: engram_node_full returned empty — history node lost")
println("[chat] conv_history_persist: engram_node_full returned empty — history node may be lost")
}
}
// conv_history_load restore conversation history from engram on first access.
// Returns the most recent "conv:history" node content, or "" if none found.
//
// Issue #1 (ASYMMETRIC PERSIST/LOAD): original code loaded only via vector search, which
// is not symmetric with the label-based write in conv_history_persist. A cold or corrupt
// vector index returns [] even when the node exists on disk. Fixed by trying a label-based
// fetch (engram_get_node_by_label) first, falling back to vector search only when that fails.
//
// Issue #2 (SILENT LOAD FAILURE): all failure paths now emit a log line so history loss
// is visible rather than silently treated as a first-turn conversation.
//
// Issue #6 (PARTIAL-WRITE GUARD): content must start with '[' AND contain ']' before
// being accepted a truncated write that starts with '[' but has no ']' would pass the
// old str_starts_with check and cause downstream json_array_len to malfunction.
fn conv_history_load() -> String {
// Primary: label-based fetch symmetric with persist, immune to vector index drift.
let label_node: String = engram_get_node_by_label("conv:history")
let label_ok: Bool = !str_eq(label_node, "") && !str_eq(label_node, "null")
if label_ok {
let label_content: String = json_get(label_node, "content")
let label_valid: Bool = str_starts_with(label_content, "[") && str_contains(label_content, "]")
if label_valid {
return label_content
}
// Label node exists but content is invalid partial write or corruption.
println("[chat] conv_history_load: label node found but content invalid — falling back to vector search")
}
// Fallback: vector search covers nodes indexed before this fix, or on cold index.
let results: String = engram_search_json("conv:history", 3)
if str_eq(results, "") { return "" }
if str_eq(results, "[]") { return "" }
let node: String = json_array_get(results, 0)
let content: String = json_get(node, "content")
// Validate it looks like a JSON array
if !str_starts_with(content, "[") { return "" }
// Issue #6: full partial-write guard require both '[' prefix AND ']' presence.
if !str_starts_with(content, "[") || !str_contains(content, "]") {
println("[chat] conv_history_load: vector search result content invalid — treating as first turn")
return ""
}
return content
}
@@ -160,6 +221,13 @@ fn handle_chat(body: String) -> String {
}
// Load history BEFORE compiling context so we can anchor activation to the thread.
// Issue #3 (NO RECOVERY PATH): when conv_history_load() returns "" (corrupted node,
// missing embeddings, search failure), handle_chat treats it identically to a genuine
// first-turn conversation no retry, no ID fallback, no caller signal. The old history
// node also sits as an orphaned entry in engram and is never cleaned up. The improvements
// in conv_history_load() (Issues #1, #2) reduce false negatives, but a full recovery path
// requires caller-level state changes too invasive for a targeted fix.
// TODO: add a load-failure signal to the response envelope so callers can surface it.
let state_hist: String = state_get("conv_history")
let stored_hist: String = if str_eq(state_hist, "") { conv_history_load() } else { state_hist }
let hist_len: Int = if str_eq(stored_hist, "") { 0 } else { json_array_len(stored_hist) }
@@ -189,6 +257,13 @@ fn handle_chat(body: String) -> String {
let req_model: String = json_get(body, "model")
let model: String = if str_eq(req_model, "") { chat_default_model() } else { req_model }
// Safety augmentation on the main chat path. Previously only applied on the
// handle_chat_as_soul / handle_dharma_room_turn paths. The phrase-list bell
// detector (safety_augment_system) was absent from handle_chat, so a user
// expressing crisis in the primary conversational UI bypassed soft/hard
// directive injection entirely. Applying it here before every llm_call_system.
let full_system = safety_augment_system(full_system, message)
let raw_response: String = llm_call_system(model, full_system, message)
let is_error: Bool = str_starts_with(raw_response, "{\"error\"")
@@ -203,6 +278,11 @@ fn handle_chat(body: String) -> String {
let updated_hist: String = hist_append(stored_hist, "user", message)
let updated_hist2: String = hist_append(updated_hist, "assistant", raw_response)
// Issue #8 (NO MAX SIZE GUARD): the 20-turn count limit bounds entry count, but individual
// messages can be arbitrarily large (up to max_tokens = 4096 tokens each). At 20 turns the
// history blob can reach ~80KB before trim fires. engram_node_full has no apparent size cap.
// A byte-length cap would require truncating or summarising entries too invasive here.
// TODO: add a byte-length cap (e.g. 32KB) that drops oldest entries until under limit.
let final_hist: String = if json_array_len(updated_hist2) > 20 {
hist_trim(updated_hist2)
} else {
@@ -512,12 +592,17 @@ fn dispatch_tool(tool_name: String, tool_input: String) -> String {
let path: String = json_get(tool_input, "path")
let old_text: String = json_get(tool_input, "old_text")
let new_text: String = json_get(tool_input, "new_text")
let content: String = fs_read(path)
let root: String = agent_workspace_root()
if !path_within_root(path, root) {
return json_safe("denied: path is outside the agent workspace root")
}
let resolved: String = resolve_in_root(path, root)
let content: String = fs_read(resolved)
if str_eq(content, "") {
return json_safe("{\"error\":\"file not found\"}")
}
let updated: String = str_replace(content, old_text, new_text)
fs_write(path, updated)
fs_write(resolved, updated)
return json_safe("{\"ok\":true}")
}
if str_eq(tool_name, "remember") {
@@ -640,21 +725,6 @@ fn handle_chat_agentic(body: String) -> String {
// Thread-aware activation: same logic as handle_chat.
// Use the session's or global history to anchor short messages to the thread.
let req_session: String = json_get(body, "session_id")
// ISSUE #6/#7: validate that the session_id actually exists before proceeding.
// Without this check the loop silently treats any unknown/fabricated session_id
// as a fresh session history loads as empty and no error is returned to the caller.
// Only validate when a session_id is explicitly provided; anonymous calls
// (no session_id) continue to work for backward compatibility.
let session_valid: Bool = if str_eq(req_session, "") {
true
} else {
!str_contains(session_get(req_session), "\"error\"")
}
if !session_valid {
return "{\"error\":\"session not found\",\"session_id\":\"" + req_session + "\",\"reply\":\"\"}"
}
let hist_key: String = if str_eq(req_session, "") { "conv_history" } else { "session_hist_" + req_session }
let agentic_hist: String = state_get(hist_key)
let agentic_hist_len: Int = if str_eq(agentic_hist, "") { 0 } else { json_array_len(agentic_hist) }
@@ -693,12 +763,23 @@ fn handle_chat_agentic(body: String) -> String {
// Persist the exchange to session/global history for thread continuity on next turn.
// Only save when the loop completed (reply present), not when tool_pending.
//
// Issue #9 (AGENTIC HISTORY NOT PERSISTED): the agentic path previously only saved
// history to in-process state (state_set), which is lost on restart. We now also call
// conv_history_persist() for the default session (hist_key == "conv_history") so agentic
// history survives restarts the same way non-agentic history does. Per-session histories
// (session_hist_<id>) are still in-process only persisting all named sessions would
// require per-session engram labels, a larger change tracked separately.
let reply_text: String = json_get(result, "reply")
let discard_hist: Bool = if !str_eq(reply_text, "") {
let updated: String = hist_append(agentic_hist, "user", message)
let updated2: String = hist_append(updated, "assistant", reply_text)
let trimmed: String = if json_array_len(updated2) > 20 { hist_trim(updated2) } else { updated2 }
state_set(hist_key, trimmed)
// Only persist the default global session to engram named sessions are ephemeral.
if str_eq(hist_key, "conv_history") {
conv_history_persist(trimmed)
}
true
} else { false }
@@ -1166,7 +1247,7 @@ fn auto_persist(req: String, resp: String) -> Void {
+ ",\"label\":\"chat:" + ts_str + "\"}"
let tags: String = "[\"Conversation\",\"chat\",\"timestamped\"]"
let persist_id: String = engram_node_full(
engram_node_full(
content,
"Conversation",
"chat:" + ts_str,
@@ -1176,9 +1257,6 @@ fn auto_persist(req: String, resp: String) -> Void {
"Episodic",
tags
)
if str_eq(persist_id, "") {
println("[chat] auto_persist: engram_node_full returned empty — conversation node lost (ts=" + ts_str + ")")
}
}
// strengthen_chat_nodes strengthen the engram nodes that were activated during a chat.
+2 -8
View File
@@ -46,10 +46,7 @@ fn mem_consolidate() -> String {
}
fn mem_save(path: String) -> Void {
let save_result: String = engram_save(path)
if str_eq(save_result, "") {
println("[memory] mem_save: engram_save failed for " + path + " — snapshot may be incomplete")
}
engram_save(path)
}
fn mem_load(path: String) -> Void {
@@ -79,14 +76,11 @@ fn mem_boot_count_inc() -> Int {
let next: Int = current + 1
let content: String = "soul:boot_count:" + int_to_str(next)
let tags: String = "[\"soul-meta\",\"boot-counter\"]"
let boot_node_id: String = engram_node_full(
let discard: String = engram_node_full(
content, "Memory", "soul:boot_count",
el_from_float(0.9), el_from_float(0.9), el_from_float(1.0),
"Canonical", tags
)
if str_eq(boot_node_id, "") {
println("[memory] mem_boot_count_inc: engram write failed — boot counter node lost (count=" + int_to_str(next) + ")")
}
return next
}
+2 -10
View File
@@ -400,7 +400,6 @@ fn handle_api_log_state_event(body: String) -> String {
let id: String = engram_node_full(parts, "InternalStateEvent", "state-event:manual",
el_from_float(0.85), el_from_float(0.85), el_from_float(0.9),
"Episodic", tags)
if !api_persisted(id) { return api_not_persisted(id) }
return "{\"ok\":true,\"id\":\"" + id + "\",\"boot\":\"" + boot + "\"}"
}
@@ -453,7 +452,6 @@ fn handle_api_tune_config(body: String) -> String {
let id: String = engram_node_full(content, "ConfigEntry", key,
el_from_float(0.85), el_from_float(0.85), el_from_float(0.9),
"Canonical", tags)
if !api_persisted(id) { return api_not_persisted(id) }
return "{\"ok\":true,\"key\":\"" + key + "\",\"value\":\"" + value + "\",\"id\":\"" + id + "\"}"
}
@@ -653,23 +651,17 @@ fn handle_api_consolidate(body: String) -> String {
let summary: String = json_get(body, "summary")
let snap: String = state_get("soul_snapshot_path")
if !str_eq(snap, "") {
let save_result: String = engram_save(snap)
if str_eq(save_result, "") {
println("[api] consolidate: engram_save failed for " + snap + " — snapshot may be out of sync")
}
engram_save(snap)
}
if !str_eq(summary, "") {
let safe_summary: String = str_replace(summary, "\"", "'")
let tags: String = "[\"SessionSummary\",\"consolidate\"]"
let summary_id: String = engram_node_full(
let discard: String = engram_node_full(
"[session-summary] " + safe_summary,
"SessionSummary", "session:summary",
el_from_float(0.7), el_from_float(0.7), el_from_float(0.9),
"Episodic", tags
)
if str_eq(summary_id, "") {
println("[api] consolidate: session summary engram write failed — summary node lost")
}
}
return "{\"ok\":true,\"snapshot\":\"" + snap + "\"}"
}
+2 -10
View File
@@ -212,13 +212,8 @@ fn seed_persona_from_env() -> Void {
let h: Map = {}
map_set(h, "Content-Type", "application/json")
let resp: String = http_post_with_headers(engram_url + "/api/nodes", body, h)
// Check for empty response (timeout/network error), explicit error, or missing id.
if str_eq(resp, "") {
println("[soul] persona HTTP write-back failed: empty response (timeout or network error) — in-memory only this session")
} else if str_contains(resp, "\"error\"") {
if str_contains(resp, "\"error\"") {
println("[soul] persona HTTP write-back failed (in-memory only this session): " + resp)
} else if !str_contains(resp, "\"id\"") {
println("[soul] persona HTTP write-back: unexpected response (no id field) — in-memory only this session: " + resp)
} else {
println("[soul] persona persisted to HTTP engram at " + engram_url)
}
@@ -251,14 +246,11 @@ fn emit_session_start_event() -> Void {
+ ",\"ts\":" + int_to_str(ts) + "}"
let tags: String = "[\"internal-state\",\"session-start\",\"InternalStateEvent\"]"
let session_event_id: String = engram_node_full(
let discard: String = engram_node_full(
payload, "InternalStateEvent", "session-start",
el_from_float(0.9), el_from_float(0.9), el_from_float(1.0),
"Episodic", tags
)
if str_eq(session_event_id, "") {
println("[soul] emit_session_start_event: engram write failed — session-start event lost")
}
println("[soul] session-start event logged (boot=" + boot_num + " nodes=" + int_to_str(node_ct) + " edges=" + int_to_str(edge_ct) + ")")
}