Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 364ecff391 |
+3
-33
@@ -40,32 +40,7 @@ fn ise_post(content: String) -> Void {
|
||||
let safe3: String = str_replace(safe2, "\n", "\\n")
|
||||
let safe4: String = str_replace(safe3, "\r", "\\r")
|
||||
let body: String = "{\"content\":\"" + safe4 + "\"}"
|
||||
// Soft circuit-breaker: skip HTTP call when engram is known-down (30s backoff).
|
||||
// Opens after 3 consecutive failures; half-open probe after backoff expires.
|
||||
// TODO(reliability): full async dispatch requires EL runtime futures support.
|
||||
let cb_open: String = state_get("engram_cb_open")
|
||||
if str_eq(cb_open, "1") {
|
||||
let cb_ts_s: String = state_get("engram_cb_open_ts")
|
||||
let cb_ts: Int = if str_eq(cb_ts_s, "") { 0 } else { str_to_int(cb_ts_s) }
|
||||
let cb_elapsed: Int = time_now() - cb_ts
|
||||
if cb_elapsed < 30000 { return "" }
|
||||
state_set("engram_cb_open", "0")
|
||||
}
|
||||
let resp: String = http_post_json(engram_url + "/api/neuron/state-events", body)
|
||||
let cb_failed: Bool = str_eq(resp, "") || str_starts_with(resp, "{"error":")
|
||||
if cb_failed {
|
||||
let fn_s: String = state_get("engram_cb_fails")
|
||||
let fn_n: Int = if str_eq(fn_s, "") { 0 } else { str_to_int(fn_s) }
|
||||
let fn_n = fn_n + 1
|
||||
state_set("engram_cb_fails", int_to_str(fn_n))
|
||||
if fn_n >= 3 {
|
||||
state_set("engram_cb_open", "1")
|
||||
state_set("engram_cb_open_ts", int_to_str(time_now()))
|
||||
println("[awareness] engram circuit-breaker OPEN after " + int_to_str(fn_n) + " failures")
|
||||
}
|
||||
} else {
|
||||
state_set("engram_cb_fails", "0")
|
||||
}
|
||||
let discard: String = http_post_json(engram_url + "/api/neuron/state-events", body)
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -565,14 +540,9 @@ fn awareness_run() -> Void {
|
||||
let should_refresh: Bool = refresh_elapsed >= refresh_ms
|
||||
if should_refresh {
|
||||
let engram_url: String = state_get("soul_engram_url")
|
||||
let sc: String = state_get("engram_cb_open")
|
||||
let sc_ts_s: String = state_get("engram_cb_open_ts")
|
||||
let sc_ts: Int = if str_eq(sc_ts_s, "") { 0 } else { str_to_int(sc_ts_s) }
|
||||
let sc_elapsed: Int = now_ts - sc_ts
|
||||
let sync_allowed: Bool = !str_eq(sc, "1") || sc_elapsed >= 30000
|
||||
if !str_eq(engram_url, "") && sync_allowed {
|
||||
if !str_eq(engram_url, "") {
|
||||
let sync_json: String = http_get(engram_url + "/api/sync")
|
||||
if !str_eq(sync_json, "") && !str_eq(sync_json, "{}") && !str_starts_with(sync_json, "{\"error\":") {
|
||||
if !str_eq(sync_json, "") && !str_eq(sync_json, "{}") {
|
||||
let cgi_id: String = state_get("soul_cgi_id")
|
||||
let tmp: String = "/tmp/soul-sync-" + cgi_id + ".json"
|
||||
fs_write(tmp, sync_json)
|
||||
|
||||
@@ -186,10 +186,6 @@ fn handle_chat(body: String) -> String {
|
||||
let req_model: String = json_get(body, "model")
|
||||
let model: String = if str_eq(req_model, "") { chat_default_model() } else { req_model }
|
||||
|
||||
// ISSUE 9: add safety_augment_system to primary /api/chat path.
|
||||
// handle_chat was the only LLM path missing bell directive injection.
|
||||
let full_system = safety_augment_system(full_system, message)
|
||||
|
||||
let raw_response: String = llm_call_system(model, full_system, message)
|
||||
|
||||
let is_error: Bool = str_starts_with(raw_response, "{\"error\"")
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
# Design proposal: searchable, recency-aware conversation memory
|
||||
|
||||
Status: **proposal — for Tim + Will, no code yet**
|
||||
Author: Neuron (Claude Opus 4.8), 2026-06-21
|
||||
Trigger: "Summarize the key themes across my recent conversations" returns nothing useful.
|
||||
|
||||
---
|
||||
|
||||
## TL;DR
|
||||
|
||||
Conversations **are** being persisted — `auto_persist` writes every turn as a
|
||||
timestamped `Conversation`/`Episodic` node. The failure is **retrieval**, not
|
||||
storage. Two gaps:
|
||||
|
||||
1. **No recency-ordered retrieval.** There is no way to ask "give me my last N
|
||||
conversation turns by time." Search is keyword-ranked only.
|
||||
2. **Lexical-only search.** `search_memory` → `engram_search_json` is BM25/lexical.
|
||||
A semantic/thematic query ("themes across recent conversations") doesn't share
|
||||
keywords with the actual topic content, so it misses.
|
||||
|
||||
The model literally tried to express the missing capability in the fake tool call
|
||||
it hallucinated: `"recency_weight": 0.8`, `"sort_by": "recency"`,
|
||||
`node_type: "ConversationTurn"`. It wanted a recency-windowed conversation fetch
|
||||
that doesn't exist.
|
||||
|
||||
## What exists today (verified)
|
||||
|
||||
- `auto_persist(req, resp)` (chat.el): after each non-agentic turn, stores
|
||||
`{"q","a","created_at","source":"chat","label":"chat:<ts>"}` as
|
||||
`engram_node_full(... "Conversation" ... "Episodic" ...)`, tags
|
||||
`["Conversation","chat","timestamped"]`.
|
||||
- `conv_history_persist` (chat.el): a **single overwriting** `conv:history`
|
||||
Episodic node holding the rolling JSON history (continuity across restarts) —
|
||||
not per-turn, not individually searchable.
|
||||
- Live engram (founder instance): **5,113 nodes, 59 conversation nodes** — a mix
|
||||
of `chat:<ts>`, several `conv:history` copies, and older `Q:/A:` nodes.
|
||||
- Retrieval surface for the agentic loop: `search_memory`, `recall`,
|
||||
`neuron_search_knowledge`, `neuron_recall` — all **query-keyword** based.
|
||||
None is "most recent N by time," none is embedding/semantic.
|
||||
|
||||
## The gap, precisely
|
||||
|
||||
| User intent | Needs | Have today |
|
||||
|---|---|---|
|
||||
| "summarize my recent conversations" | last-N-by-time fetch | ✗ (keyword only) |
|
||||
| "what did we discuss about X" | semantic match on topic | ~ (lexical only; misses paraphrase) |
|
||||
| "themes across everything" | semantic cluster over corpus | ✗ |
|
||||
|
||||
`auto_persist` only fires on the **non-agentic** path (`handle_chat`). Worth
|
||||
confirming the **agentic** path (`handle_chat_agentic`) persists turns too — if
|
||||
not, agentic conversations never get stored, a second (smaller) gap.
|
||||
|
||||
## Proposal
|
||||
|
||||
Three layers, smallest-first. (1) alone fixes the headline use case.
|
||||
|
||||
### 1. Recency-windowed conversation retrieval (the high-value, low-cost win)
|
||||
A runtime/engram primitive + an agentic tool:
|
||||
|
||||
- **Engram**: `engram_recent_by_type(node_type, limit, since_ts?)` → newest-first
|
||||
by `created_at`. (Conversation nodes already carry `created_at`.)
|
||||
- **Agentic tool**: `recent_conversations(limit=20, since?)` →
|
||||
`[{q,a,created_at}, …]`, newest first. Exposed in `agentic_tools_all`.
|
||||
- **System-prompt hint**: for "recent / lately / this week / summarize our
|
||||
conversations," prefer `recent_conversations` over `search_memory`.
|
||||
|
||||
This directly answers "summarize my recent conversations" — fetch last N, hand
|
||||
the model the actual turns, let it cluster themes. No embeddings required.
|
||||
|
||||
### 2. Stable per-session threading
|
||||
Today each turn is an independent `chat:<ts>` node; there's no session grouping.
|
||||
Add `session_id` + a monotonic turn index to the persisted content (the UI already
|
||||
sends `session_id`). Enables "summarize *this* conversation" and per-session recall,
|
||||
and lets retrieval return coherent threads instead of loose turns.
|
||||
|
||||
### 3. Semantic retrieval (the real fix for thematic queries)
|
||||
Lexical BM25 can't do "themes." Options, in order of effort:
|
||||
- **a.** Embeddings on Conversation nodes + a vector search tool
|
||||
(`semantic_search`). Biggest lift; also fixes knowledge recall broadly.
|
||||
- **b.** Interim: a two-pass "map-reduce" — `recent_conversations` to pull the
|
||||
window, then let the model cluster. Cheap, ships with (1), no infra.
|
||||
|
||||
Recommend **(1) + (2) now, (3b) as the interim thematic answer, (3a) as the
|
||||
roadmap item** once embeddings land (this dovetails with the GraphRAG/embedding
|
||||
work already noted in memory: substring 1.7% P@5 vs BM25 55% vs graph 21.7%).
|
||||
|
||||
## Open questions for Will
|
||||
1. ~~Does the agentic path persist turns?~~ **Resolved: yes** — the dispatcher
|
||||
calls `auto_persist` after both the agentic and non-agentic branches
|
||||
(`routes.el` lines 156/298). Both paths store per-turn nodes.
|
||||
2. `conv:history` is accumulating duplicate overwriting nodes (saw several in the
|
||||
live engram) — intended, or should it truly overwrite/dedupe?
|
||||
3. Is there appetite for the `engram_recent_by_type` primitive in the runtime, or
|
||||
should recency be done in `.el` by scanning + sorting (fine at 59 nodes, weak
|
||||
at scale)?
|
||||
4. Embeddings (3a): on the roadmap timeline, or defer and ship (1)+(2)+(3b)?
|
||||
|
||||
## Not in scope
|
||||
Persistence itself (it works), and the separate **confabulation** fix (model
|
||||
faking tool calls in Just-chat mode) — that's `neuron` PR #29.
|
||||
+4
-8
@@ -24,23 +24,19 @@ ENGRAM_DATA_DIR="$ENGRAM_DATA_DIR" \
|
||||
|
||||
ENGRAM_PID=$!
|
||||
|
||||
# Wait for engram to become healthy (up to 60s; GKE Autopilot cold starts can be slow)
|
||||
# Wait for engram to become healthy (up to 30s)
|
||||
echo "[entrypoint] waiting for engram..."
|
||||
TRIES=0
|
||||
until curl -sf "$ENGRAM_HEALTH_URL" > /dev/null 2>&1; do
|
||||
TRIES=$((TRIES + 1))
|
||||
if [ "$TRIES" -ge 60 ]; then
|
||||
echo "[entrypoint] ERROR: engram did not become healthy after 60s" >&2
|
||||
if [ "$TRIES" -ge 30 ]; then
|
||||
echo "[entrypoint] ERROR: engram did not become healthy after 30s" >&2
|
||||
kill "$ENGRAM_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "[entrypoint] engram ready after ${TRIES}s"
|
||||
|
||||
# Tune EL HTTP runtime: reduce per-call timeout 60s->10s, connect timeout 3s.
|
||||
export EL_HTTP_TIMEOUT_MS="${EL_HTTP_TIMEOUT_MS:-10000}"
|
||||
export EL_HTTP_CONNECT_TIMEOUT_MS="${EL_HTTP_CONNECT_TIMEOUT_MS:-3000}"
|
||||
echo "[entrypoint] engram ready"
|
||||
|
||||
# Start soul — it takes over as PID 1's foreground process.
|
||||
# SOUL_ENGRAM_PATH must NOT be set; ENGRAM_URL triggers HTTP mode.
|
||||
|
||||
@@ -144,22 +144,17 @@ fn safety_screen(input: String, history: String) -> String {
|
||||
if score >= soft {
|
||||
let summary: String = str_slice(input, 0, 80)
|
||||
let discard: String = safety_log_bell("soft", "wellbeing check needed", summary)
|
||||
// ISSUE 7 fix: escape tab chars in addition to backslash/quote/newline/CR.
|
||||
// A tab in user input corrupts the JSON envelope and causes json_get to misparse.
|
||||
let e1: String = str_replace(input, "\\", "\\\\")
|
||||
let e2: String = str_replace(e1, "\"", "\\\"")
|
||||
let e3: String = str_replace(e2, "\n", "\\n")
|
||||
let e4: String = str_replace(e3, "\r", "\\r")
|
||||
let safe_input: String = str_replace(e4, "\t", "\\t")
|
||||
let safe_input: String = str_replace(e3, "\r", "\\r")
|
||||
return "{\"action\":\"soft_bell\",\"reason\":\"wellbeing check needed\",\"content\":\"" + safe_input + "\"}"
|
||||
}
|
||||
|
||||
// ISSUE 7 fix: escape tab chars (see soft_bell branch above for rationale).
|
||||
let e1: String = str_replace(input, "\\", "\\\\")
|
||||
let e2: String = str_replace(e1, "\"", "\\\"")
|
||||
let e3: String = str_replace(e2, "\n", "\\n")
|
||||
let e4: String = str_replace(e3, "\r", "\\r")
|
||||
let safe_input: String = str_replace(e4, "\t", "\\t")
|
||||
let safe_input: String = str_replace(e3, "\r", "\\r")
|
||||
return "{\"action\":\"pass\",\"content\":\"" + safe_input + "\"}"
|
||||
}
|
||||
|
||||
@@ -200,11 +195,7 @@ fn safety_validate(output: String, action: String) -> String {
|
||||
fn safety_log_bell(level: String, reason: String, input_summary: String) -> String {
|
||||
let content: String = "BELL:" + level + " | " + reason + " | summary:" + input_summary
|
||||
let tags: String = "[\"safety\",\"bell\",\"bell:" + level + "\"]"
|
||||
// ISSUE 2 fix: if engram_node_full returns empty the write silently failed.
|
||||
// Emit a fallback println so the bell event leaves at least a log trace even
|
||||
// when engram is degraded. This does not replace engram persistence -- it is a
|
||||
// last-resort audit trail when the primary write cannot be confirmed.
|
||||
let node_id: String = engram_node_full(
|
||||
let discard: String = engram_node_full(
|
||||
content,
|
||||
"BellEvent",
|
||||
"bell:" + level,
|
||||
@@ -214,9 +205,6 @@ fn safety_log_bell(level: String, reason: String, input_summary: String) -> Stri
|
||||
"Episodic",
|
||||
tags
|
||||
)
|
||||
if str_eq(node_id, "") {
|
||||
println("[safety] WARN: bell event engram write failed -- fallback log: " + content)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -247,17 +235,6 @@ fn safety_soft_phrases() -> String {
|
||||
return "[\"stressed\",\"overwhelmed\",\"can't cope\",\"cannot cope\",\"struggling\",\"anxious\",\"anxiety\",\"depressed\",\"depression\",\"lonely\",\"isolated\",\"hopeless\",\"hopelessness\",\"exhausted\",\"burnt out\",\"burned out\",\"burnout\",\"panic\",\"panicking\",\"falling apart\",\"breaking down\",\"can't handle\",\"cannot handle\",\"losing it\",\"nothing matters\",\"don't care anymore\",\"given up\",\"giving up\",\"helpless\",\"worthless\",\"useless\",\"hate myself\",\"no one cares\",\"nobody cares\",\"no one understands\",\"nobody understands\",\"empty inside\",\"can't stop crying\",\"breaking point\",\"at my limit\",\"having a breakdown\"]"
|
||||
}
|
||||
|
||||
// ISSUE 5 TODO: phrase lists are rebuilt from JSON literals on every call.
|
||||
// safety_any_match and safety_count_match loop over json_array_get on every invocation.
|
||||
// A compiled/cached representation would reduce per-message overhead and also guard against
|
||||
// malformed phrase JSON (json_array_len of malformed input returns 0, silently skipping all checks).
|
||||
// Caching requires language-level static const arrays -- not available in current EL.
|
||||
// When EL gains module-level const arrays, migrate phrase lists to that form.
|
||||
//
|
||||
// ISSUE 5 TODO: phrase lists are rebuilt from JSON literals on every call to
|
||||
// safety_any_match / safety_count_match. json_array_len of a malformed string
|
||||
// returns 0, silently skipping all checks. Caching requires language-level static
|
||||
// const arrays (not available in current EL). Migrate when EL gains that feature.
|
||||
// ── Matching helpers (single loops only — el escapes while-body mutation via
|
||||
// top-level let rebinds; nested loops would not advance) ────────────────────
|
||||
|
||||
|
||||
@@ -5,9 +5,13 @@ import "stewardship.el"
|
||||
import "imprint.el"
|
||||
import "awareness.el"
|
||||
import "chat.el"
|
||||
import "safety.el"
|
||||
import "studio.el"
|
||||
import "elp-input.el"
|
||||
import "routes.el"
|
||||
import "safety.el"
|
||||
import "stewardship.el"
|
||||
import "imprint.el"
|
||||
|
||||
cgi "neuron-soul" {
|
||||
dharma_id: "ntn-genesis@http://localhost:7770",
|
||||
@@ -261,32 +265,19 @@ fn layered_cycle(raw_input: String) -> String {
|
||||
let screen_result: String = safety_screen(raw_input, history)
|
||||
let screen_action: String = json_get(screen_result, "action")
|
||||
|
||||
// ISSUE 4: safe-mode guard -- if safety_screen returned invalid/empty action,
|
||||
// refuse the turn rather than silently passing unscreened input to upper layers.
|
||||
// Valid actions: "hard_bell", "soft_bell", "pass". Anything else = corrupt envelope.
|
||||
let valid_action: Bool = str_eq(screen_action, "hard_bell")
|
||||
|| str_eq(screen_action, "soft_bell")
|
||||
|| str_eq(screen_action, "pass")
|
||||
if !valid_action {
|
||||
println("[soul] layered_cycle: safety_screen invalid action -- safe mode refusal")
|
||||
return safety_validate("", "hard_bell")
|
||||
}
|
||||
|
||||
// Hard bell: bypass all upper layers, log and escalate.
|
||||
// Intentionally does NOT update conversation_history or call auto_persist():
|
||||
// hard bell events are security-sensitive and must not appear in engram conversation
|
||||
// history where they could leak context to subsequent turns. They are persisted
|
||||
// separately by safety_log_bell() into the Episodic tier with restricted labels.
|
||||
//
|
||||
// ISSUE 6: safety_log_bell for hard bells is already called INSIDE safety_screen
|
||||
// (safety.el line 140). Do NOT call it again here -- double-log avoided.
|
||||
//
|
||||
// safety_validate second param: when screen_action is "hard_bell", safety_validate
|
||||
// receives the sentinel string "hard_bell" (not a normal screen action). The safety
|
||||
// layer contract requires it to return a fixed refusal regardless of the output arg.
|
||||
// On the normal path, safety_validate receives the original screen_action ("pass")
|
||||
// so it can apply action-specific post-output checks.
|
||||
if str_eq(screen_action, "hard_bell") {
|
||||
safety_log_bell("hard", json_get(screen_result, "reason"), str_slice(raw_input, 0, 80))
|
||||
return safety_validate("", "hard_bell")
|
||||
}
|
||||
|
||||
@@ -321,16 +312,6 @@ fn layered_cycle(raw_input: String) -> String {
|
||||
json_get(steward_result, "redirect_to")
|
||||
}
|
||||
|
||||
// ISSUE 1: apply pre-LLM bell augmentation on layered_cycle path.
|
||||
// safety_augment_system injects soft/hard directive into system prompt before LLM call.
|
||||
// Stored in state so imprint_respond can consume it.
|
||||
// TODO: wire directly into imprint_respond when it accepts a system_override param.
|
||||
// ISSUE 3 TODO: no semantic/embedding crisis detection. Keyword-only means signals
|
||||
// evading the phrase list pass through with zero augmentation. Semantic layer is a
|
||||
// separate architectural decision requiring embedding inference on every message.
|
||||
let augmented_addendum: String = safety_augment_system("", raw_input)
|
||||
state_set("layered_cycle_safety_system_addendum", augmented_addendum)
|
||||
|
||||
// L3: imprint responds
|
||||
let output: String = imprint_respond(aligned, imprint_id)
|
||||
|
||||
@@ -370,29 +351,12 @@ let snapshot_usable: Bool = local_node_count > 50
|
||||
|
||||
if using_http_engram && !snapshot_usable {
|
||||
// First boot or empty/corrupt snapshot: seed from HTTP Engram.
|
||||
// Retry up to 3 times (2s sleep between attempts) to guard against a
|
||||
// transient network hiccup right after entrypoint.sh health check passes.
|
||||
// An empty nodes response silently loads a zero-node graph; validate first.
|
||||
// TODO(reliability): replace sleep_ms retry with non-blocking backoff.
|
||||
println("[soul] engram -> HTTP " + engram_url_raw + " (no local snapshot, first boot)")
|
||||
let fetch_attempt: Int = 0
|
||||
while fetch_attempt < 3 {
|
||||
let fetch_attempt = fetch_attempt + 1
|
||||
let n: String = http_get(engram_url_raw + "/api/nodes?limit=10000")
|
||||
let e: String = http_get(engram_url_raw + "/api/edges")
|
||||
let nodes_ok: Bool = !str_eq(n, "") && str_starts_with(n, "[") && str_len(n) > 2
|
||||
if nodes_ok {
|
||||
state_set("_boot_nodes_json", n)
|
||||
state_set("_boot_edges_json", e)
|
||||
let fetch_attempt = 3
|
||||
} else {
|
||||
println("[soul] boot HTTP fetch attempt " + int_to_str(fetch_attempt) + " failed --- retrying in 2s")
|
||||
sleep_ms(2000)
|
||||
}
|
||||
}
|
||||
let nodes_json: String = state_get("_boot_nodes_json")
|
||||
let edges_json: String = state_get("_boot_edges_json")
|
||||
let snapshot_data: String = "{\"nodes\":" + nodes_part + ",\"edges\":" + edges_part + "}"
|
||||
let nodes_json: String = http_get(engram_url_raw + "/api/nodes?limit=10000")
|
||||
let edges_json: String = http_get(engram_url_raw + "/api/edges")
|
||||
let nodes_part: String = if str_eq(nodes_json, "") { "[]" } else { nodes_json }
|
||||
let edges_part: String = if str_eq(edges_json, "") { "[]" } else { edges_json }
|
||||
let snapshot_data: String = "{\"nodes\":" + nodes_part + ",\"edges\":" + edges_part + "}"
|
||||
let tmp_path: String = "/tmp/soul-engram-" + soul_cgi_id + ".json"
|
||||
fs_write(tmp_path, snapshot_data)
|
||||
engram_load(tmp_path)
|
||||
|
||||
Reference in New Issue
Block a user