Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c5b966773 | |||
| b2008f4894 |
@@ -40,9 +40,43 @@ fn engram_compile(intent: String) -> String {
|
||||
""
|
||||
}
|
||||
|
||||
// Affective context: always include the most recent high-emotion memory if one
|
||||
// exists within 72 hours. This ensures continuity of care across turns — when
|
||||
// the user was in distress earlier in the session (or recently), that context
|
||||
// travels into every subsequent LLM call so the response register stays aware.
|
||||
// We search for BellEvent nodes specifically; these are written by auto_persist
|
||||
// when safety_detect_bell_level fires. The 72h window (259200 seconds) is wide
|
||||
// enough to span a multi-session day without pulling ancient history.
|
||||
let bell_nodes: String = engram_search_json("bell:soft bell:hard BellEvent", 3)
|
||||
let bell_ok: Bool = !str_eq(bell_nodes, "") && !str_eq(bell_nodes, "[]")
|
||||
let now_ts: Int = time_now()
|
||||
let cutoff_ts: Int = now_ts - 259200
|
||||
let recent_bell: String = if bell_ok {
|
||||
let bn0: String = json_array_get(bell_nodes, 0)
|
||||
// created_at is not present in engram node JSON for BellEvent nodes.
|
||||
// Extract the timestamp embedded in the content string as " | ts:NNNNN".
|
||||
// Fall back to created_at / updated_at JSON fields if the marker is absent.
|
||||
let bn_content: String = json_get(bn0, "content")
|
||||
let ts_marker: String = " | ts:"
|
||||
let ts_pos: Int = str_index_of(bn_content, ts_marker)
|
||||
let bn_ts_raw: String = if ts_pos >= 0 {
|
||||
let ts_start: Int = ts_pos + str_len(ts_marker)
|
||||
let rest: String = str_slice(bn_content, ts_start, str_len(bn_content))
|
||||
let next_sep: Int = str_index_of(rest, " | ")
|
||||
if next_sep < 0 { rest } else { str_slice(rest, 0, next_sep) }
|
||||
} else {
|
||||
let ca: String = json_get(bn0, "created_at")
|
||||
if str_eq(ca, "") { json_get(bn0, "updated_at") } else { ca }
|
||||
}
|
||||
let bn_ts: Int = if str_eq(bn_ts_raw, "") { 0 } else { str_to_int(bn_ts_raw) }
|
||||
if bn_ts > cutoff_ts { bn0 } else { "" }
|
||||
} else { "" }
|
||||
let affective_part: String = if !str_eq(recent_bell, "") { recent_bell } else { "" }
|
||||
|
||||
let sep1: String = if !str_eq(act_part, "") && !str_eq(srch_part, "") { "\n" } else { "" }
|
||||
let sep2: String = if (!str_eq(act_part, "") || !str_eq(srch_part, "")) && !str_eq(scan_part, "") { "\n" } else { "" }
|
||||
let ctx: String = act_part + sep1 + srch_part + sep2 + scan_part
|
||||
let sep3: String = if (!str_eq(act_part, "") || !str_eq(srch_part, "") || !str_eq(scan_part, "")) && !str_eq(affective_part, "") { "\n" } else { "" }
|
||||
let ctx: String = act_part + sep1 + srch_part + sep2 + scan_part + sep3 + affective_part
|
||||
|
||||
if str_eq(ctx, "") { return "" }
|
||||
|
||||
@@ -108,6 +142,69 @@ fn hist_trim(hist: String) -> String {
|
||||
return hist
|
||||
}
|
||||
|
||||
// hist_trim_with_bell_guard — trim the history window exactly as hist_trim does, but
|
||||
// before dropping the oldest user/assistant pair check whether the user turn triggered
|
||||
// a bell event. If it did, write a preservation node to engram so the distress exchange
|
||||
// survives the 20-turn window. The LLM window drops it; engram retains it permanently
|
||||
// and engram_compile will surface it again via the affective context path.
|
||||
fn hist_trim_with_bell_guard(hist: String) -> String {
|
||||
// Extract the first turn (should be a user message) to inspect it.
|
||||
let inner: String = str_slice(hist, 1, str_len(hist) - 1)
|
||||
let marker: String = "{\"role\":"
|
||||
let i1: Int = str_index_of(inner, marker)
|
||||
// i1 is the start of the first entry within inner.
|
||||
// Find where the second entry begins to delimit the first entry's JSON.
|
||||
let tail1: String = str_slice(inner, i1 + 1, str_len(inner))
|
||||
let i2: Int = str_index_of(tail1, marker)
|
||||
// The first entry spans from i1 to (i1 + 1 + i2 - 1) within inner.
|
||||
let first_entry_raw: String = if i2 > 0 {
|
||||
str_slice(inner, i1, i1 + 1 + i2 - 1)
|
||||
} else {
|
||||
str_slice(inner, i1, str_len(inner))
|
||||
}
|
||||
let first_role: String = json_get(first_entry_raw, "role")
|
||||
let first_content: String = json_get(first_entry_raw, "content")
|
||||
|
||||
// Only inspect user turns — assistant content doesn't carry bell signals.
|
||||
let bell_level: String = if str_eq(first_role, "user") {
|
||||
safety_detect_bell_level(first_content)
|
||||
} else {
|
||||
"none"
|
||||
}
|
||||
|
||||
// If the turn being evicted triggered a bell, preserve it to engram.
|
||||
// This is distinct from the BellEvent written by auto_persist: that node
|
||||
// carries a short summary. This node carries the full exchange content so
|
||||
// it is recoverable for clinical/continuity review.
|
||||
if !str_eq(bell_level, "none") {
|
||||
let ts: Int = time_now()
|
||||
let ts_str: String = int_to_str(ts)
|
||||
let safe_content: String = str_replace(first_content, "\"", "'")
|
||||
let preserve_content: String = "PRESERVED_BELL:" + bell_level
|
||||
+ " | evicted_at:" + ts_str
|
||||
+ " | message:" + safe_content
|
||||
let preserve_tags: String = "[\"bell-history\",\"bell:" + bell_level + "\",\"evicted\",\"affective\",\"BellEvent\"]"
|
||||
let discard: String = engram_node_full(
|
||||
preserve_content,
|
||||
"BellEvent",
|
||||
"bell:" + bell_level + ":preserved",
|
||||
el_from_float(0.9),
|
||||
el_from_float(0.9),
|
||||
el_from_float(1.0),
|
||||
"Episodic",
|
||||
preserve_tags
|
||||
)
|
||||
}
|
||||
|
||||
// Now perform the standard trim (drop oldest 2 entries = 1 user + 1 assistant pair).
|
||||
let tail2: String = str_slice(tail1, i2 + 1, str_len(tail1))
|
||||
let i3: Int = str_index_of(tail2, marker)
|
||||
if i3 >= 0 {
|
||||
return "[" + str_slice(tail2, i3, str_len(tail2)) + "]"
|
||||
}
|
||||
return hist
|
||||
}
|
||||
|
||||
// clean_llm_response — strips GPT-2 BPE byte-to-unicode artifacts that vLLM
|
||||
// emits when the tokenizer hasn't decoded back to raw bytes.
|
||||
//
|
||||
@@ -200,8 +297,10 @@ fn handle_chat(body: String) -> String {
|
||||
|
||||
let updated_hist: String = hist_append(stored_hist, "user", message)
|
||||
let updated_hist2: String = hist_append(updated_hist, "assistant", raw_response)
|
||||
// Use bell-guarded trim: if the evicted turn triggered a bell event, it is
|
||||
// preserved to engram before being dropped from the in-memory window.
|
||||
let final_hist: String = if json_array_len(updated_hist2) > 20 {
|
||||
hist_trim(updated_hist2)
|
||||
hist_trim_with_bell_guard(updated_hist2)
|
||||
} else {
|
||||
updated_hist2
|
||||
}
|
||||
@@ -1135,14 +1234,28 @@ fn auto_persist(req: String, resp: String) -> Void {
|
||||
let safe_msg: String = str_replace(message, "\"", "'")
|
||||
let safe_reply: String = str_replace(reply2, "\"", "'")
|
||||
|
||||
// Detect emotional salience before persisting. safety_detect_bell_level uses the
|
||||
// same phrase lists as the safety layer (safety.el), so the classification is
|
||||
// consistent with what safety_screen already evaluated for this turn.
|
||||
let bell_level: String = safety_detect_bell_level(message)
|
||||
let is_bell: Bool = !str_eq(bell_level, "none")
|
||||
|
||||
// Tag the Conversation node with bell metadata when distress is present so
|
||||
// subsequent affective queries (e.g. engram_compile) can find this exchange.
|
||||
let tags: String = if is_bell {
|
||||
"[\"Conversation\",\"chat\",\"timestamped\",\"bell:" + bell_level + "\",\"affective\"]"
|
||||
} else {
|
||||
"[\"Conversation\",\"chat\",\"timestamped\"]"
|
||||
}
|
||||
|
||||
let content: String = "{\"q\":\"" + safe_msg + "\""
|
||||
+ ",\"a\":\"" + safe_reply + "\""
|
||||
+ ",\"created_at\":" + ts_str
|
||||
+ ",\"source\":\"chat\""
|
||||
+ ",\"bell\":\"" + bell_level + "\""
|
||||
+ ",\"label\":\"chat:" + ts_str + "\"}"
|
||||
|
||||
let tags: String = "[\"Conversation\",\"chat\",\"timestamped\"]"
|
||||
engram_node_full(
|
||||
let conv_node_id: String = engram_node_full(
|
||||
content,
|
||||
"Conversation",
|
||||
"chat:" + ts_str,
|
||||
@@ -1152,6 +1265,72 @@ fn auto_persist(req: String, resp: String) -> Void {
|
||||
"Episodic",
|
||||
tags
|
||||
)
|
||||
|
||||
// When a bell fires, write a dedicated BellEvent node in addition to the
|
||||
// Conversation node. This makes distress moments directly findable by label
|
||||
// ("bell:soft" / "bell:hard") without having to scan all Conversation nodes.
|
||||
// The BellEvent carries higher salience so engram_compile pulls it into context.
|
||||
// The message content is truncated to 120 chars — enough signal, not a full dump.
|
||||
if is_bell {
|
||||
let summary: String = if str_len(message) > 120 { str_slice(message, 0, 120) } else { message }
|
||||
let safe_summary: String = str_replace(summary, "\"", "'")
|
||||
let bell_content: String = "BELL:" + bell_level
|
||||
+ " | ts:" + ts_str
|
||||
+ " | summary:" + safe_summary
|
||||
|
||||
// bell:hard gets peak salience; bell:soft is slightly lower.
|
||||
let sal_a: String = if str_eq(bell_level, "hard") { el_from_float(0.98) } else { el_from_float(0.88) }
|
||||
let sal_b: String = if str_eq(bell_level, "hard") { el_from_float(0.98) } else { el_from_float(0.88) }
|
||||
let sal_c: String = if str_eq(bell_level, "hard") { el_from_float(1.0) } else { el_from_float(0.95) }
|
||||
|
||||
let bell_tags: String = "[\"safety\",\"bell\",\"bell:" + bell_level + "\",\"affective\",\"BellEvent\"]"
|
||||
let bell_ts_str: String = int_to_str(time_now())
|
||||
let bell_label: String = "bell:" + bell_level + ":" + bell_ts_str
|
||||
let bell_node_id: String = engram_node_full(
|
||||
bell_content,
|
||||
"BellEvent",
|
||||
bell_label,
|
||||
sal_a,
|
||||
sal_b,
|
||||
sal_c,
|
||||
"Episodic",
|
||||
bell_tags
|
||||
)
|
||||
|
||||
// Increment session-level bell counter so session_hist_save knows whether
|
||||
// any bell fired during this session when writing a boundary summary.
|
||||
let sess_id: String = json_get(req, "session_id")
|
||||
let bell_key: String = if str_eq(sess_id, "") {
|
||||
"session_bell_count"
|
||||
} else {
|
||||
"session_bell_count:" + sess_id
|
||||
}
|
||||
let prior_count: String = state_get(bell_key)
|
||||
let prior_n: Int = if str_eq(prior_count, "") { 0 } else { str_to_int(prior_count) }
|
||||
state_set(bell_key, int_to_str(prior_n + 1))
|
||||
|
||||
// Also record the highest bell level seen this session so the boundary
|
||||
// summary can classify the session correctly (hard takes precedence).
|
||||
let level_key: String = if str_eq(sess_id, "") {
|
||||
"session_bell_level"
|
||||
} else {
|
||||
"session_bell_level:" + sess_id
|
||||
}
|
||||
let prior_level: String = state_get(level_key)
|
||||
let new_level: String = if str_eq(bell_level, "hard") { "hard" } else {
|
||||
if str_eq(prior_level, "hard") { "hard" } else { "soft" }
|
||||
}
|
||||
state_set(level_key, new_level)
|
||||
|
||||
// Stash a short signal summary for the boundary node (last bell wins for
|
||||
// the one-liner; the full history is in per-bell BellEvent nodes).
|
||||
let signal_key: String = if str_eq(sess_id, "") {
|
||||
"session_bell_signal"
|
||||
} else {
|
||||
"session_bell_signal:" + sess_id
|
||||
}
|
||||
state_set(signal_key, safe_summary)
|
||||
}
|
||||
}
|
||||
|
||||
// strengthen_chat_nodes — strengthen the engram nodes that were activated during a chat.
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
# Design proposal: searchable, recency-aware conversation memory
|
||||
|
||||
Status: **proposal — for Tim + Will, no code yet**
|
||||
Author: Neuron (Claude Opus 4.8), 2026-06-21
|
||||
Trigger: "Summarize the key themes across my recent conversations" returns nothing useful.
|
||||
|
||||
---
|
||||
|
||||
## TL;DR
|
||||
|
||||
Conversations **are** being persisted — `auto_persist` writes every turn as a
|
||||
timestamped `Conversation`/`Episodic` node. The failure is **retrieval**, not
|
||||
storage. Two gaps:
|
||||
|
||||
1. **No recency-ordered retrieval.** There is no way to ask "give me my last N
|
||||
conversation turns by time." Search is keyword-ranked only.
|
||||
2. **Lexical-only search.** `search_memory` → `engram_search_json` is BM25/lexical.
|
||||
A semantic/thematic query ("themes across recent conversations") doesn't share
|
||||
keywords with the actual topic content, so it misses.
|
||||
|
||||
The model literally tried to express the missing capability in the fake tool call
|
||||
it hallucinated: `"recency_weight": 0.8`, `"sort_by": "recency"`,
|
||||
`node_type: "ConversationTurn"`. It wanted a recency-windowed conversation fetch
|
||||
that doesn't exist.
|
||||
|
||||
## What exists today (verified)
|
||||
|
||||
- `auto_persist(req, resp)` (chat.el): after each non-agentic turn, stores
|
||||
`{"q","a","created_at","source":"chat","label":"chat:<ts>"}` as
|
||||
`engram_node_full(... "Conversation" ... "Episodic" ...)`, tags
|
||||
`["Conversation","chat","timestamped"]`.
|
||||
- `conv_history_persist` (chat.el): a **single overwriting** `conv:history`
|
||||
Episodic node holding the rolling JSON history (continuity across restarts) —
|
||||
not per-turn, not individually searchable.
|
||||
- Live engram (founder instance): **5,113 nodes, 59 conversation nodes** — a mix
|
||||
of `chat:<ts>`, several `conv:history` copies, and older `Q:/A:` nodes.
|
||||
- Retrieval surface for the agentic loop: `search_memory`, `recall`,
|
||||
`neuron_search_knowledge`, `neuron_recall` — all **query-keyword** based.
|
||||
None is "most recent N by time," none is embedding/semantic.
|
||||
|
||||
## The gap, precisely
|
||||
|
||||
| User intent | Needs | Have today |
|
||||
|---|---|---|
|
||||
| "summarize my recent conversations" | last-N-by-time fetch | ✗ (keyword only) |
|
||||
| "what did we discuss about X" | semantic match on topic | ~ (lexical only; misses paraphrase) |
|
||||
| "themes across everything" | semantic cluster over corpus | ✗ |
|
||||
|
||||
`auto_persist` only fires on the **non-agentic** path (`handle_chat`). Worth
|
||||
confirming the **agentic** path (`handle_chat_agentic`) persists turns too — if
|
||||
not, agentic conversations never get stored, a second (smaller) gap.
|
||||
|
||||
## Proposal
|
||||
|
||||
Three layers, smallest-first. (1) alone fixes the headline use case.
|
||||
|
||||
### 1. Recency-windowed conversation retrieval (the high-value, low-cost win)
|
||||
A runtime/engram primitive + an agentic tool:
|
||||
|
||||
- **Engram**: `engram_recent_by_type(node_type, limit, since_ts?)` → newest-first
|
||||
by `created_at`. (Conversation nodes already carry `created_at`.)
|
||||
- **Agentic tool**: `recent_conversations(limit=20, since?)` →
|
||||
`[{q,a,created_at}, …]`, newest first. Exposed in `agentic_tools_all`.
|
||||
- **System-prompt hint**: for "recent / lately / this week / summarize our
|
||||
conversations," prefer `recent_conversations` over `search_memory`.
|
||||
|
||||
This directly answers "summarize my recent conversations" — fetch last N, hand
|
||||
the model the actual turns, let it cluster themes. No embeddings required.
|
||||
|
||||
### 2. Stable per-session threading
|
||||
Today each turn is an independent `chat:<ts>` node; there's no session grouping.
|
||||
Add `session_id` + a monotonic turn index to the persisted content (the UI already
|
||||
sends `session_id`). Enables "summarize *this* conversation" and per-session recall,
|
||||
and lets retrieval return coherent threads instead of loose turns.
|
||||
|
||||
### 3. Semantic retrieval (the real fix for thematic queries)
|
||||
Lexical BM25 can't do "themes." Options, in order of effort:
|
||||
- **a.** Embeddings on Conversation nodes + a vector search tool
|
||||
(`semantic_search`). Biggest lift; also fixes knowledge recall broadly.
|
||||
- **b.** Interim: a two-pass "map-reduce" — `recent_conversations` to pull the
|
||||
window, then let the model cluster. Cheap, ships with (1), no infra.
|
||||
|
||||
Recommend **(1) + (2) now, (3b) as the interim thematic answer, (3a) as the
|
||||
roadmap item** once embeddings land (this dovetails with the GraphRAG/embedding
|
||||
work already noted in memory: substring 1.7% P@5 vs BM25 55% vs graph 21.7%).
|
||||
|
||||
## Open questions for Will
|
||||
1. ~~Does the agentic path persist turns?~~ **Resolved: yes** — the dispatcher
|
||||
calls `auto_persist` after both the agentic and non-agentic branches
|
||||
(`routes.el` lines 156/298). Both paths store per-turn nodes.
|
||||
2. `conv:history` is accumulating duplicate overwriting nodes (saw several in the
|
||||
live engram) — intended, or should it truly overwrite/dedupe?
|
||||
3. Is there appetite for the `engram_recent_by_type` primitive in the runtime, or
|
||||
should recency be done in `.el` by scanning + sorting (fine at 59 nodes, weak
|
||||
at scale)?
|
||||
4. Embeddings (3a): on the roadmap timeline, or defer and ship (1)+(2)+(3b)?
|
||||
|
||||
## Not in scope
|
||||
Persistence itself (it works), and the separate **confabulation** fix (model
|
||||
faking tool calls in Just-chat mode) — that's `neuron` PR #29.
|
||||
+42
@@ -368,6 +368,48 @@ fn session_hist_save(session_id: String, hist: String) -> Void {
|
||||
el_from_float(0.6), el_from_float(0.6), el_from_float(0.9),
|
||||
"Episodic", tags
|
||||
)
|
||||
|
||||
// Session boundary emotional summary — written once per session the first time
|
||||
// a bell event has fired. The summary node is findable by future sessions via
|
||||
// broad affective queries ("session:emotional-summary" or "bell distress session").
|
||||
// It is NOT rewritten on every save — the state flag prevents duplicate nodes.
|
||||
let summary_written_key: String = "session_bell_summary_written:" + session_id
|
||||
let already_written: String = state_get(summary_written_key)
|
||||
if str_eq(already_written, "") {
|
||||
let bell_count_key: String = "session_bell_count:" + session_id
|
||||
let bell_count_raw: String = state_get(bell_count_key)
|
||||
let bell_count: Int = if str_eq(bell_count_raw, "") { 0 } else { str_to_int(bell_count_raw) }
|
||||
if bell_count > 0 {
|
||||
let bell_level_key: String = "session_bell_level:" + session_id
|
||||
let bell_signal_key: String = "session_bell_signal:" + session_id
|
||||
let dominant_level: String = state_get(bell_level_key)
|
||||
let last_signal: String = state_get(bell_signal_key)
|
||||
let eff_level: String = if str_eq(dominant_level, "") { "soft" } else { dominant_level }
|
||||
let eff_signal: String = if str_eq(last_signal, "") { "(no signal captured)" } else { last_signal }
|
||||
let ts_now: Int = time_now()
|
||||
let summary_content: String = "session:emotional-summary"
|
||||
+ " | session:" + session_id
|
||||
+ " | bell_count:" + int_to_str(bell_count)
|
||||
+ " | dominant_level:" + eff_level
|
||||
+ " | last_signal:" + eff_signal
|
||||
+ " | ts:" + int_to_str(ts_now)
|
||||
let summary_tags: String = "[\"session-emotional-summary\",\"affective\",\"bell:" + eff_level + "\",\"BellEvent\"]"
|
||||
let summary_sal: String = if str_eq(eff_level, "hard") { el_from_float(0.95) } else { el_from_float(0.85) }
|
||||
let sum_discard: String = engram_node_full(
|
||||
summary_content,
|
||||
"BellEvent",
|
||||
"session:emotional-summary",
|
||||
summary_sal,
|
||||
summary_sal,
|
||||
el_from_float(1.0),
|
||||
"Episodic",
|
||||
summary_tags
|
||||
)
|
||||
// Mark written so we do not create duplicate summary nodes as the
|
||||
// session continues accumulating more turns.
|
||||
state_set(summary_written_key, "1")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// session_update_meta_timestamp — update the updated_at field in the session:meta node.
|
||||
|
||||
Reference in New Issue
Block a user