Compare commits

..

2 Commits

Author SHA1 Message Date
will.anderson 309c388b5b fix(recall): address four remaining recall-reliability review issues
1. engram_numeric_valid: "0.0"/"0.00"/"00.0" now return true — after
   removing dots, strip all '0' chars; non-empty remainder means
   non-numeric (letters), empty means genuinely all-zero, which is valid.
   Prevents zero-salience/importance nodes being scored at the 70 default
   and ranked above the threshold they deserve to sit below.

2. Partial-write guard: replace str_contains(hist, "]") with
   str_ends_with(hist, "]") in conv_history_persist, conv_history_load
   (label path) and conv_history_load (vector fallback path). A truncated
   array whose *content* contains "]" (e.g. "item 1] item 2") no longer
   passes the guard.

3. Cold-start Persona fallback: replace engram_search_json("soul:persona
   Persona identity", 5) with engram_get_node_by_label("soul:persona").
   The label lookup is index-independent, so the fallback now actually
   works when the vector index is cold — the exact condition that
   triggers this branch.

4. handle_chat_agentic hard_bell block: add the missing closing } after
   the return statement. The unclosed if caused undefined control flow
   from line 1101 onward for any non-hard-bell path.
2026-06-22 13:34:05 -05:00
will.anderson a39998a502 feat(recall): recall-reliability improvements
Neuron Soul CI / build (pull_request) Failing after 12m52s
- Q1: engram_numeric_valid() guard against non-numeric timestamps in bell scoring
- Q2: soul-agnostic cold-start fallback in engram_compile (drops genesis-specific hardcoded node IDs)
- Q3: partial-write guard and failure logging in conv_history_persist/load
- Q4: document circuit-breaker limitation requiring C runtime support
- Q5: println warnings on empty activation/search paths
- Q6: load_identity_context warns when all identity fetches return empty
- Q7: recall_status state tracking (ok/empty/unavailable) surfaced to LLM via MEMORY STATUS block
- Q8: document shared-state race conditions in engram_recall_status and safety_system_addendum
- CRITICAL BUG: conv_node_id empty check moved outside is_bell block so silent Conversation node loss is always logged
2026-06-22 12:52:31 -05:00
10 changed files with 292 additions and 685 deletions
-2
View File
@@ -678,8 +678,6 @@ fn threat_trajectory_check(tool_name: String, tool_input: String) -> Int {
return combined
}
// TODO(reliability #10): agentic_conv_history is process-global; awareness loop
// and HTTP workers race on this key. Impact: noisy threat score only, not content.
fn threat_history_append(text: String) -> Void {
let current: String = state_get("agentic_conv_history")
let safe_text: String = str_to_lower(text)
+252 -623
View File
File diff suppressed because it is too large Load Diff
+4 -8
View File
@@ -24,23 +24,19 @@ ENGRAM_DATA_DIR="$ENGRAM_DATA_DIR" \
ENGRAM_PID=$!
# Wait for engram to become healthy (up to 60s; GKE Autopilot cold starts can be slow)
# Wait for engram to become healthy (up to 30s)
echo "[entrypoint] waiting for engram..."
TRIES=0
until curl -sf "$ENGRAM_HEALTH_URL" > /dev/null 2>&1; do
TRIES=$((TRIES + 1))
if [ "$TRIES" -ge 60 ]; then
echo "[entrypoint] ERROR: engram did not become healthy after 60s" >&2
if [ "$TRIES" -ge 30 ]; then
echo "[entrypoint] ERROR: engram did not become healthy after 30s" >&2
kill "$ENGRAM_PID" 2>/dev/null || true
exit 1
fi
sleep 1
done
echo "[entrypoint] engram ready after ${TRIES}s"
# Tune EL HTTP runtime: reduce per-call timeout 60s->10s, connect timeout 3s.
export EL_HTTP_TIMEOUT_MS="${EL_HTTP_TIMEOUT_MS:-10000}"
export EL_HTTP_CONNECT_TIMEOUT_MS="${EL_HTTP_CONNECT_TIMEOUT_MS:-3000}"
echo "[entrypoint] engram ready"
# Start soul — it takes over as PID 1's foreground process.
# SOUL_ENGRAM_PATH must NOT be set; ENGRAM_URL triggers HTTP mode.
-4
View File
@@ -5,10 +5,6 @@
// imprint_current returns the active imprint ID from state.
// Falls back to "base" (bare Neuron, no suit) when nothing is loaded.
//
// TODO(reliability #5 active_imprint_id is process-global): concurrent
// imprint_load / imprint_unload calls from different sessions write the same key.
// Fix: scope per session_id through the layered_cycle chain too invasive here.
fn imprint_current() -> String {
let id: String = state_get("active_imprint_id")
return if str_eq(id, "") { "base" } else { id }
+2 -8
View File
@@ -46,10 +46,7 @@ fn mem_consolidate() -> String {
}
fn mem_save(path: String) -> Void {
let save_result: String = engram_save(path)
if str_eq(save_result, "") {
println("[memory] mem_save: engram_save failed for " + path + " — snapshot may be incomplete")
}
engram_save(path)
}
fn mem_load(path: String) -> Void {
@@ -79,14 +76,11 @@ fn mem_boot_count_inc() -> Int {
let next: Int = current + 1
let content: String = "soul:boot_count:" + int_to_str(next)
let tags: String = "[\"soul-meta\",\"boot-counter\"]"
let boot_node_id: String = engram_node_full(
let discard: String = engram_node_full(
content, "Memory", "soul:boot_count",
el_from_float(0.9), el_from_float(0.9), el_from_float(1.0),
"Canonical", tags
)
if str_eq(boot_node_id, "") {
println("[memory] mem_boot_count_inc: engram write failed — boot counter node lost (count=" + int_to_str(next) + ")")
}
return next
}
+2 -10
View File
@@ -400,7 +400,6 @@ fn handle_api_log_state_event(body: String) -> String {
let id: String = engram_node_full(parts, "InternalStateEvent", "state-event:manual",
el_from_float(0.85), el_from_float(0.85), el_from_float(0.9),
"Episodic", tags)
if !api_persisted(id) { return api_not_persisted(id) }
return "{\"ok\":true,\"id\":\"" + id + "\",\"boot\":\"" + boot + "\"}"
}
@@ -453,7 +452,6 @@ fn handle_api_tune_config(body: String) -> String {
let id: String = engram_node_full(content, "ConfigEntry", key,
el_from_float(0.85), el_from_float(0.85), el_from_float(0.9),
"Canonical", tags)
if !api_persisted(id) { return api_not_persisted(id) }
return "{\"ok\":true,\"key\":\"" + key + "\",\"value\":\"" + value + "\",\"id\":\"" + id + "\"}"
}
@@ -653,23 +651,17 @@ fn handle_api_consolidate(body: String) -> String {
let summary: String = json_get(body, "summary")
let snap: String = state_get("soul_snapshot_path")
if !str_eq(snap, "") {
let save_result: String = engram_save(snap)
if str_eq(save_result, "") {
println("[api] consolidate: engram_save failed for " + snap + " — snapshot may be out of sync")
}
engram_save(snap)
}
if !str_eq(summary, "") {
let safe_summary: String = str_replace(summary, "\"", "'")
let tags: String = "[\"SessionSummary\",\"consolidate\"]"
let summary_id: String = engram_node_full(
let discard: String = engram_node_full(
"[session-summary] " + safe_summary,
"SessionSummary", "session:summary",
el_from_float(0.7), el_from_float(0.7), el_from_float(0.9),
"Episodic", tags
)
if str_eq(summary_id, "") {
println("[api] consolidate: session summary engram write failed — summary node lost")
}
}
return "{\"ok\":true,\"snapshot\":\"" + snap + "\"}"
}
-3
View File
@@ -367,9 +367,6 @@ fn handle_request(method: String, path: String, body: String) -> String {
return engram_scan_nodes_json(9999, 0)
}
if str_eq(clean, "/api/graph/edges") {
// TODO(reliability #8): engram_save races with awareness loop mem_save().
// Both now use atomic write-to-temp+rename (el_runtime.c). Serialised
// by engram_global_mu. Future: add engram_edges_json() builtin.
let snap_path: String = env("HOME") + "/.neuron/engram/snapshot.json"
engram_save(snap_path)
let snap: String = fs_read(snap_path)
+7 -18
View File
@@ -144,8 +144,7 @@ fn safety_screen(input: String, history: String) -> String {
if score >= soft {
let summary: String = str_slice(input, 0, 80)
let discard: String = safety_log_bell("soft", "wellbeing check needed", summary)
// ISSUE 7 fix: escape tab chars in addition to backslash/quote/newline/CR.
// A tab in user input corrupts the JSON envelope and causes json_get to misparse.
// ISSUE 7: also escape tab chars to prevent JSON envelope corruption.
let e1: String = str_replace(input, "\\", "\\\\")
let e2: String = str_replace(e1, "\"", "\\\"")
let e3: String = str_replace(e2, "\n", "\\n")
@@ -154,7 +153,7 @@ fn safety_screen(input: String, history: String) -> String {
return "{\"action\":\"soft_bell\",\"reason\":\"wellbeing check needed\",\"content\":\"" + safe_input + "\"}"
}
// ISSUE 7 fix: escape tab chars (see soft_bell branch above for rationale).
// ISSUE 7: also escape tab chars (see soft_bell branch above).
let e1: String = str_replace(input, "\\", "\\\\")
let e2: String = str_replace(e1, "\"", "\\\"")
let e3: String = str_replace(e2, "\n", "\\n")
@@ -200,10 +199,7 @@ fn safety_validate(output: String, action: String) -> String {
fn safety_log_bell(level: String, reason: String, input_summary: String) -> String {
let content: String = "BELL:" + level + " | " + reason + " | summary:" + input_summary
let tags: String = "[\"safety\",\"bell\",\"bell:" + level + "\"]"
// ISSUE 2 fix: if engram_node_full returns empty the write silently failed.
// Emit a fallback println so the bell event leaves at least a log trace even
// when engram is degraded. This does not replace engram persistence -- it is a
// last-resort audit trail when the primary write cannot be confirmed.
// ISSUE 2: fallback log when engram write fails silently.
let node_id: String = engram_node_full(
content,
"BellEvent",
@@ -215,7 +211,7 @@ fn safety_log_bell(level: String, reason: String, input_summary: String) -> Stri
tags
)
if str_eq(node_id, "") {
println("[safety] WARN: bell event engram write failed -- fallback log: " + content)
println("[safety] WARN: bell engram write failed -- " + content)
}
return ""
}
@@ -248,16 +244,9 @@ fn safety_soft_phrases() -> String {
}
// ISSUE 5 TODO: phrase lists are rebuilt from JSON literals on every call.
// safety_any_match and safety_count_match loop over json_array_get on every invocation.
// A compiled/cached representation would reduce per-message overhead and also guard against
// malformed phrase JSON (json_array_len of malformed input returns 0, silently skipping all checks).
// Caching requires language-level static const arrays -- not available in current EL.
// When EL gains module-level const arrays, migrate phrase lists to that form.
//
// ISSUE 5 TODO: phrase lists are rebuilt from JSON literals on every call to
// safety_any_match / safety_count_match. json_array_len of a malformed string
// returns 0, silently skipping all checks. Caching requires language-level static
// const arrays (not available in current EL). Migrate when EL gains that feature.
// json_array_len of malformed input returns 0, silently skipping all checks.
// Caching requires language-level static const arrays -- not in current EL.
// Migrate to const arrays when EL gains that feature.
// Matching helpers (single loops only el escapes while-body mutation via
// top-level let rebinds; nested loops would not advance) ────────────────────
-4
View File
@@ -104,8 +104,6 @@ fn session_create(body: String) -> String {
// Newest sessions first (prepend).
// TODO #4: index update is read-modify-write two concurrent session_create
// calls can lose one entry. EL has no CAS primitive; fix requires runtime support.
// TODO(reliability #2): session_index RMW is non-atomic. Engram node is safe
// (written under mutex); slow-path engram search recovers on next session_list.
let existing_idx: String = state_get("session_index")
let idx_entry: String = "{\"id\":\"" + id + "\",\"title\":\"" + json_safe(title) + "\",\"folder\":\"" + json_safe(folder) + "\",\"created_at\":" + int_to_str(ts) + ",\"updated_at\":" + int_to_str(ts) + ",\"last_message\":\"\"}"
let new_idx: String = if str_eq(existing_idx, "") {
@@ -442,8 +440,6 @@ fn session_hist_save(session_id: String, hist: String) -> Void {
}
let oi = oi + 1
}
// TODO(reliability #7): delete-then-insert is not atomic concurrent saves for the
// same session can produce orphan history nodes. State is primary truth; engram fallback.
let tags: String = "[\"session\",\"session-history\",\"Conversation\"]"
let discard: String = engram_node_full(
hist, "Conversation", "session:messages:" + session_id,
+25 -5
View File
@@ -148,6 +148,14 @@ fn load_identity_context() -> Void {
println("[soul] identity context loaded (" + int_to_str(str_len(ctx)) + " chars, " + int_to_str(parts_count) + " nodes)")
}
// Q6 fix: warn when all three identity node fetches return empty. For genesis this
// indicates a corrupted or missing graph. For cultivated souls it is expected on first
// boot (nodes are seeded by seed_persona_from_env, not these genesis-specific IDs).
// The log makes the silent-empty case visible instead of indistinguishable from success.
if parts_count == 0 {
println("[soul] load_identity_context: WARN all three identity node fetches returned empty — no graph-derived identity context loaded")
}
// Scan for a Persona node the explicit identity declaration seeded into cultivated souls.
// Stored at seeding time with label "soul:persona" and node_type "Persona".
// genesis derives identity from the graph directly; cultivated souls have this node seeded.
@@ -162,6 +170,12 @@ fn load_identity_context() -> Void {
println("[soul] persona node loaded (" + int_to_str(str_len(p_content)) + " chars)")
}
}
// Q6 fix: if neither identity nodes nor persona node were loaded, log explicitly.
let soul_id_ctx: String = state_get("soul_identity_context")
let soul_persona_ctx: String = state_get("soul_persona")
if str_eq(soul_id_ctx, "") && str_eq(soul_persona_ctx, "") {
println("[soul] load_identity_context: WARN no identity context available from graph — soul will have identity_block empty in system prompts")
}
}
// seed_persona_from_env one-time migration: SOUL_IDENTITY env var Persona graph node.
@@ -296,11 +310,8 @@ fn layered_cycle(raw_input: String) -> String {
let cont_status: String = json_get(continuity, "status")
let cont_action: String = json_get(continuity, "action")
// Store continuity status so imprint can adjust its response register.
// TODO(reliability #4): session_continuity is process-global; scope per session_id
// when available to prevent cross-session bleed under concurrent layered_cycle calls.
let cont_key: String = if str_eq(session_id, "") { "session_continuity" } else { "session_continuity:" + session_id }
state_set(cont_key, cont_status)
// Store continuity status so imprint can adjust its response register
state_set("session_continuity", cont_status)
// Identity anomaly: add a gentle verification cue to the input before imprint
let guided: String = if str_eq(cont_action, "identity_check") {
@@ -330,6 +341,15 @@ fn layered_cycle(raw_input: String) -> String {
// TODO: wire directly when imprint_respond gains system_override param (imprint.el change).
// ISSUE 3 TODO: no semantic crisis detection. Keyword-only means signals that evade
// the phrase list pass with zero augmentation. Semantic layer = separate decision.
//
// Q8 race documentation: "layered_cycle_safety_system_addendum" is a shared process-global
// state key. Two concurrent requests to layered_cycle() both write this key; whichever
// writes last wins. The concurrent build_system_prompt() read in chat.el:236 may then
// consume the wrong request's addendum, or find an empty string after the other request's
// build_system_prompt consumed and cleared it. Mitigation: under http_serve_async, the
// layered_cycle path and the /api/chat path are different endpoints (typically); true
// concurrent layered_cycle calls are uncommon. A robust fix requires per-request state
// scoping which needs C runtime support (e.g. a request-id-keyed addendum map).
let augmented_addendum: String = safety_augment_system("", raw_input)
state_set("layered_cycle_safety_system_addendum", augmented_addendum)