feat(recall): activation-seed improvements
- Issue 2: replace raw 50-char threshold with is_genuine_continuation() that checks for explicit follow-up phrases and mid-sentence capitalization (proper nouns signal a new topic, not a continuation) - Issue 3/8: build_activation_seed() scans back to find the prior USER turn as the topic anchor instead of using the last assistant reply (hist_len-1) - Issue 4: engram_compile_multi() fans out across three seeds — enriched primary, raw message (entity queries), and emotion query — merging non-redundant results - Issue 5: agent workspace_root appended to ag_seed so agentic activation is workspace-aware; previously ignored despite being available in state - Issue 6: distill_transcript() extracts salient tail+question content from full transcripts before passing to engram_compile in dharma room handlers - Issue 7: dist/soul-with-nlg.el handle_chat and handle_chat_agentic now load history and use build_activation_seed() — the raw message path is eliminated - Issue 9: topic_snip_from_entry() takes the TAIL 200 chars of a long reply and finds the last sentence boundary — captures end-of-reply named concepts - Issue 10: multi_turn_topic() pulls up to 3 prior user turns into the non- continuation seed so earlier thread context re-activates high-salience nodes
This commit is contained in:
@@ -116,6 +116,240 @@ fn engram_compile_ranked(nodes_json: String, max_nodes: Int) -> String {
|
||||
return c9
|
||||
}
|
||||
|
||||
// is_followup_phrase — returns true when the message is a recognized follow-up
|
||||
// reference that should anchor recall to the prior user topic rather than stand alone.
|
||||
// Used by build_activation_seed to choose the right enrichment strategy.
|
||||
fn is_followup_phrase(msg: String) -> Bool {
|
||||
if str_contains(msg, "tell me more") { return true }
|
||||
if str_contains(msg, "elaborate") { return true }
|
||||
if str_contains(msg, "go on") { return true }
|
||||
if str_contains(msg, "what about that") { return true }
|
||||
if str_contains(msg, "what else") { return true }
|
||||
if str_contains(msg, "keep going") { return true }
|
||||
if str_contains(msg, "continue") { return true }
|
||||
if str_contains(msg, "more detail") { return true }
|
||||
if str_contains(msg, "last part") { return true }
|
||||
if str_contains(msg, "say more") { return true }
|
||||
if str_eq(msg, "ok") { return true }
|
||||
if str_eq(msg, "yes") { return true }
|
||||
if str_eq(msg, "yeah") { return true }
|
||||
if str_eq(msg, "and?") { return true }
|
||||
if str_eq(msg, "so?") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
// is_genuine_continuation — returns true when a short message is a contextual
|
||||
// follow-up rather than a new topic. Fixes Issue 2: the old threshold (str_len < 50)
|
||||
// conflated new-topic short messages like "explain quantum tunneling" (49 chars)
|
||||
// with genuine follow-ups like "ok", "yes", or "what do you think?".
|
||||
fn is_genuine_continuation(msg: String, hist_len: Int) -> Bool {
|
||||
if hist_len == 0 { return false }
|
||||
if str_len(msg) == 0 { return false }
|
||||
if is_followup_phrase(msg) { return true }
|
||||
if str_len(msg) >= 50 { return false }
|
||||
let rest: String = str_slice(msg, 1, str_len(msg))
|
||||
let has_mid_capital: Bool = false
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " A")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " B")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " C")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " D")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " E")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " F")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " G")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " H")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " I")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " J")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " K")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " L")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " M")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " N")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " O")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " P")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " Q")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " R")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " S")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " T")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " U")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " V")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " W")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " X")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " Y")
|
||||
let has_mid_capital = has_mid_capital || str_contains(rest, " Z")
|
||||
if has_mid_capital { return false }
|
||||
return true
|
||||
}
|
||||
|
||||
// topic_snip_from_entry — extract the most salient snippet from a history entry's
|
||||
// content. Fixes Issue 9: takes the TAIL (last 200 chars) then trims to the last
|
||||
// sentence boundary, so named concepts introduced near the end are captured.
|
||||
fn topic_snip_from_entry(content: String) -> String {
|
||||
let clen: Int = str_len(content)
|
||||
if clen <= 200 { return content }
|
||||
let tail: String = str_slice(content, clen - 200, clen)
|
||||
let last_boundary: Int = -1
|
||||
let si: Int = 0
|
||||
let tail_len: Int = str_len(tail)
|
||||
while si < tail_len - 1 {
|
||||
let ch2: String = str_slice(tail, si, si + 2)
|
||||
let is_boundary: Bool = str_eq(ch2, ". ") || str_eq(ch2, ".\n")
|
||||
let last_boundary = if is_boundary { si } else { last_boundary }
|
||||
let si = si + 1
|
||||
}
|
||||
let clean_tail: String = if last_boundary >= 0 {
|
||||
str_slice(tail, last_boundary + 2, tail_len)
|
||||
} else { tail }
|
||||
if str_len(clean_tail) > 150 { return str_slice(clean_tail, 0, 150) }
|
||||
return clean_tail
|
||||
}
|
||||
|
||||
// multi_turn_topic — build a combined topic string from recent user turns in history.
|
||||
// Fixes Issue 10: pulls up to 3 prior user turns into the seed so earlier
|
||||
// high-salience nodes from the thread are re-queried.
|
||||
fn multi_turn_topic(hist: String, hist_len: Int) -> String {
|
||||
if hist_len == 0 { return "" }
|
||||
let topic: String = ""
|
||||
let collected: Int = 0
|
||||
let idx: Int = hist_len - 1
|
||||
while idx >= 0 && collected < 3 {
|
||||
let entry: String = json_array_get(hist, idx)
|
||||
let role: String = json_get(entry, "role")
|
||||
let content: String = json_get(entry, "content")
|
||||
let is_user: Bool = str_eq(role, "user")
|
||||
let snip: String = if str_len(content) > 100 { str_slice(content, 0, 100) } else { content }
|
||||
let topic = if is_user && !str_eq(snip, "") {
|
||||
if str_eq(topic, "") { snip } else { snip + " " + topic }
|
||||
} else { topic }
|
||||
let collected = if is_user { collected + 1 } else { collected }
|
||||
let idx = idx - 1
|
||||
}
|
||||
if str_len(topic) > 300 { return str_slice(topic, 0, 300) }
|
||||
return topic
|
||||
}
|
||||
|
||||
// distill_transcript — extract salient content from a multi-turn transcript.
|
||||
// Fixes Issue 6: a full transcript produces a diffuse embedding query.
|
||||
// Strategy: last 150 chars (recency) + any question in last 500 chars. Cap 250.
|
||||
fn distill_transcript(transcript: String) -> String {
|
||||
if str_len(transcript) <= 250 { return transcript }
|
||||
let tlen: Int = str_len(transcript)
|
||||
let tail_start: Int = if tlen > 500 { tlen - 500 } else { 0 }
|
||||
let tail: String = str_slice(transcript, tail_start, tlen)
|
||||
let tail_len: Int = str_len(tail)
|
||||
let q_pos: Int = -1
|
||||
let qi: Int = 0
|
||||
while qi < tail_len {
|
||||
let qch: String = str_slice(tail, qi, qi + 1)
|
||||
let q_pos = if str_eq(qch, "?") { qi } else { q_pos }
|
||||
let qi = qi + 1
|
||||
}
|
||||
let q_context: String = if q_pos > 0 {
|
||||
let q_start: Int = if q_pos > 100 { q_pos - 100 } else { 0 }
|
||||
str_slice(tail, q_start, q_pos + 1)
|
||||
} else { "" }
|
||||
let recency_seed: String = if tail_len > 150 {
|
||||
str_slice(tail, tail_len - 150, tail_len)
|
||||
} else { tail }
|
||||
let combined: String = if str_eq(q_context, "") {
|
||||
recency_seed
|
||||
} else {
|
||||
if str_contains(recency_seed, q_context) { recency_seed }
|
||||
else { q_context + " " + recency_seed }
|
||||
}
|
||||
if str_len(combined) > 250 {
|
||||
return str_slice(combined, str_len(combined) - 250, str_len(combined))
|
||||
}
|
||||
return combined
|
||||
}
|
||||
|
||||
// build_activation_seed — construct an enriched activation seed from the current
|
||||
// message and conversation history. Central fix for Issues 1-3, 8-10.
|
||||
fn build_activation_seed(message: String, hist: String, hist_len: Int) -> String {
|
||||
if hist_len == 0 { return message }
|
||||
|
||||
let is_cont: Bool = is_genuine_continuation(message, hist_len)
|
||||
|
||||
if !is_cont {
|
||||
let multi_topic: String = multi_turn_topic(hist, hist_len)
|
||||
if str_eq(multi_topic, "") { return message }
|
||||
let blended: String = message + " " + multi_topic
|
||||
if str_len(blended) > 400 { return str_slice(blended, 0, 400) }
|
||||
return blended
|
||||
}
|
||||
|
||||
// Genuine continuation: find the most recent prior USER turn as the topic anchor.
|
||||
// Fixes Issues 3 and 8: old code used the last assistant reply (hist_len - 1).
|
||||
let prior_user_content: String = ""
|
||||
let scan_idx: Int = hist_len - 1
|
||||
let found_prior_user: Bool = false
|
||||
while scan_idx >= 0 && !found_prior_user {
|
||||
let scan_entry: String = json_array_get(hist, scan_idx)
|
||||
let scan_role: String = json_get(scan_entry, "role")
|
||||
let scan_content: String = json_get(scan_entry, "content")
|
||||
let is_user_turn: Bool = str_eq(scan_role, "user")
|
||||
let prior_user_content = if is_user_turn && !found_prior_user { scan_content } else { prior_user_content }
|
||||
let found_prior_user = if is_user_turn { true } else { found_prior_user }
|
||||
let scan_idx = scan_idx - 1
|
||||
}
|
||||
|
||||
// Secondary: tail-biased snip from last assistant reply (Issue 9 fix).
|
||||
let last_asst_entry: String = json_array_get(hist, hist_len - 1)
|
||||
let last_asst_role: String = json_get(last_asst_entry, "role")
|
||||
let last_asst_content: String = if str_eq(last_asst_role, "assistant") {
|
||||
json_get(last_asst_entry, "content")
|
||||
} else { "" }
|
||||
let asst_snip: String = if str_eq(last_asst_content, "") { "" } else {
|
||||
topic_snip_from_entry(last_asst_content)
|
||||
}
|
||||
let user_snip: String = if str_len(prior_user_content) > 150 {
|
||||
str_slice(prior_user_content, 0, 150)
|
||||
} else { prior_user_content }
|
||||
|
||||
let seed: String = if !str_eq(user_snip, "") {
|
||||
if !str_eq(asst_snip, "") {
|
||||
user_snip + " " + asst_snip + " " + message
|
||||
} else {
|
||||
user_snip + " " + message
|
||||
}
|
||||
} else {
|
||||
if !str_eq(asst_snip, "") { asst_snip + " " + message } else { message }
|
||||
}
|
||||
if str_len(seed) > 400 { return str_slice(seed, 0, 400) }
|
||||
return seed
|
||||
}
|
||||
|
||||
// engram_compile_multi — fan-out activation across multiple query seeds. Fixes Issue 4:
|
||||
// only a single seed was tried per turn, with no entity/emotion/topic diversification.
|
||||
fn engram_compile_multi(primary_seed: String, message: String) -> String {
|
||||
let ctx1: String = engram_compile(primary_seed)
|
||||
|
||||
let entity_seed_differs: Bool = !str_eq(primary_seed, message)
|
||||
let ctx2: String = if entity_seed_differs {
|
||||
let raw_ctx: String = engram_compile(message)
|
||||
if str_eq(raw_ctx, "") { "" } else { raw_ctx }
|
||||
} else { "" }
|
||||
|
||||
let has_any: Bool = !str_eq(ctx1, "") || !str_eq(ctx2, "")
|
||||
let ctx3: String = if has_any {
|
||||
let emo_results: String = engram_search_json("emotion feeling mood care distress joy hope", 5)
|
||||
let emo_ok: Bool = !str_eq(emo_results, "") && !str_eq(emo_results, "[]")
|
||||
if emo_ok { engram_compile_ranked(emo_results, 3) } else { "" }
|
||||
} else { "" }
|
||||
|
||||
let merged: String = ctx1
|
||||
let sep2: String = if !str_eq(merged, "") && !str_eq(ctx2, "") { "\n" } else { "" }
|
||||
let merged = if !str_eq(ctx2, "") && !str_contains(ctx1, ctx2) {
|
||||
merged + sep2 + ctx2
|
||||
} else { merged }
|
||||
let sep3: String = if !str_eq(merged, "") && !str_eq(ctx3, "") { "\n" } else { "" }
|
||||
let merged = if !str_eq(ctx3, "") && !str_contains(merged, ctx3) {
|
||||
merged + sep3 + ctx3
|
||||
} else { merged }
|
||||
|
||||
if str_eq(merged, "") { return "" }
|
||||
if str_len(merged) > 6000 { return str_slice(merged, 0, 6000) }
|
||||
return merged
|
||||
}
|
||||
|
||||
fn engram_compile(intent: String) -> String {
|
||||
let activate_json: String = engram_activate_json(intent, 5)
|
||||
// Fetch more search results than we'll use so ranking has a real pool to pick from.
|
||||
@@ -384,19 +618,10 @@ fn handle_chat(body: String) -> String {
|
||||
let stored_hist: String = if str_eq(state_hist, "") { conv_history_load() } else { state_hist }
|
||||
let hist_len: Int = if str_eq(stored_hist, "") { 0 } else { json_array_len(stored_hist) }
|
||||
|
||||
// Thread-aware activation: short/ambiguous messages (continuations like "go on",
|
||||
// "what else?", "yes") activate on the last reply instead of the bare message.
|
||||
// This prevents a strong off-topic memory node from hijacking the reply when the
|
||||
// user is clearly continuing an existing thread.
|
||||
let is_continuation: Bool = str_len(message) < 50 && hist_len > 0
|
||||
let last_entry: String = if is_continuation { json_array_get(stored_hist, hist_len - 1) } else { "" }
|
||||
let last_content: String = if !str_eq(last_entry, "") { json_get(last_entry, "content") } else { "" }
|
||||
let thread_snip: String = if str_len(last_content) > 150 { str_slice(last_content, 0, 150) } else { last_content }
|
||||
let activation_seed: String = if !str_eq(thread_snip, "") {
|
||||
thread_snip + " " + message
|
||||
} else {
|
||||
message
|
||||
}
|
||||
// Issues 2-3, 8-10 fix: build_activation_seed() replaces the raw 50-char threshold
|
||||
// with smart continuation detection, prior-user-topic anchoring, multi-turn context,
|
||||
// and tail-biased snipping from long assistant replies.
|
||||
let activation_seed: String = build_activation_seed(message, stored_hist, hist_len)
|
||||
|
||||
// Cross-session affective context: on session start (no history yet), check engram
|
||||
// for recent distress signals within 72h and prepend a care directive if found.
|
||||
@@ -417,7 +642,8 @@ fn handle_chat(body: String) -> String {
|
||||
} else { "" }
|
||||
} else { "" }
|
||||
|
||||
let ctx: String = engram_compile(activation_seed)
|
||||
// Issue 4 fix: engram_compile_multi adds entity + emotion fan-out seeds
|
||||
let ctx: String = engram_compile_multi(activation_seed, message)
|
||||
let system: String = affective_prefix + build_system_prompt(ctx)
|
||||
|
||||
// First message of the session: proactively load user profile and active work context.
|
||||
@@ -1000,13 +1226,15 @@ fn handle_chat_agentic(body: String) -> String {
|
||||
let hist_key: String = if str_eq(req_session, "") { "conv_history" } else { "session_hist_" + req_session }
|
||||
let agentic_hist: String = state_get(hist_key)
|
||||
let agentic_hist_len: Int = if str_eq(agentic_hist, "") { 0 } else { json_array_len(agentic_hist) }
|
||||
let ag_is_cont: Bool = str_len(message) < 50 && agentic_hist_len > 0
|
||||
let ag_last_entry: String = if ag_is_cont { json_array_get(agentic_hist, agentic_hist_len - 1) } else { "" }
|
||||
let ag_last_content: String = if !str_eq(ag_last_entry, "") { json_get(ag_last_entry, "content") } else { "" }
|
||||
let ag_thread_snip: String = if str_len(ag_last_content) > 150 { str_slice(ag_last_content, 0, 150) } else { ag_last_content }
|
||||
let ag_seed: String = if !str_eq(ag_thread_snip, "") { ag_thread_snip + " " + message } else { message }
|
||||
|
||||
let ctx: String = engram_compile(ag_seed)
|
||||
// Issues 2-5, 8-10 fix: build_activation_seed for smart continuation/multi-turn.
|
||||
// Issue 5 fix: workspace_root appended so agent activation is workspace-aware.
|
||||
let ag_seed_base: String = build_activation_seed(message, agentic_hist, agentic_hist_len)
|
||||
let ag_workspace_root: String = agent_workspace_root()
|
||||
let ag_seed: String = if !str_eq(ag_workspace_root, "") {
|
||||
ag_seed_base + " workspace:" + ag_workspace_root
|
||||
} else { ag_seed_base }
|
||||
// Issue 4 fix: multi-seed fan-out (entity + emotion)
|
||||
let ctx: String = engram_compile_multi(ag_seed, message)
|
||||
let identity: String = state_get("soul_identity")
|
||||
let system: String = identity + " You have access to tools: read files, write files, browse the web, search your memory, run commands. Use them when they add genuine value. Be direct.\n\n" + ctx
|
||||
|
||||
@@ -1396,7 +1624,8 @@ fn handle_dharma_room_turn(body: String) -> String {
|
||||
}
|
||||
|
||||
// The soul's own memories, activated by what it's reading — not injected.
|
||||
let engram_ctx: String = engram_compile(transcript)
|
||||
// Issue 6 fix: distill_transcript() reduces diffuse embedding noise
|
||||
let engram_ctx: String = engram_compile(distill_transcript(transcript))
|
||||
let system_prompt: String = if str_eq(engram_ctx, "") {
|
||||
identity
|
||||
} else {
|
||||
@@ -1448,7 +1677,8 @@ fn handle_dharma_room_turn_agentic(body: String) -> String {
|
||||
return "{\"error\":\"transcript is required\",\"response\":\"\",\"cgi_id\":\"" + cgi_id + "\"}"
|
||||
}
|
||||
|
||||
let ctx: String = engram_compile(transcript)
|
||||
// Issue 6 fix: distill_transcript() reduces diffuse embedding noise
|
||||
let ctx: String = engram_compile(distill_transcript(transcript))
|
||||
let system: String = identity + " You have access to tools: read files, write files, browse the web, search your memory, run commands. Use them when they add genuine value. Be direct and stay in character.\n\n" + ctx
|
||||
|
||||
let api_key: String = agentic_api_key()
|
||||
|
||||
+23
-14
@@ -22313,7 +22313,23 @@ fn handle_chat(body: String) -> String {
|
||||
// In demo mode: use tighter engram budget and add response length constraint.
|
||||
let is_demo: Bool = !str_eq(state_get("soul_identity_prefix"), "")
|
||||
|
||||
let ctx: String = if is_demo { engram_compile_demo(message) } else { engram_compile(message) }
|
||||
// Issue 7 fix: load history BEFORE building the activation seed so we can
|
||||
// apply the continuation guard that chat.el uses. The nlg code path previously
|
||||
// called engram_compile(message) with no thread enrichment at all.
|
||||
let stored_hist: String = state_get("conv_history")
|
||||
let hist_len: Int = if str_eq(stored_hist, "") { 0 } else { json_array_len(stored_hist) }
|
||||
let history_section: String = if hist_len > 0 {
|
||||
"\n\n[RECENT CONVERSATION — last " + int_to_str(hist_len) + " turns]\n" + stored_hist
|
||||
} else {
|
||||
""
|
||||
}
|
||||
|
||||
// Issue 7 fix: build enriched seed using build_activation_seed() — adds
|
||||
// smart continuation detection, prior-user-topic anchoring, multi-turn context,
|
||||
// and tail-biased snipping (Issues 2-3, 8-10). For demo mode, still use
|
||||
// engram_compile_demo but with the enriched seed.
|
||||
let nlg_seed: String = build_activation_seed(message, stored_hist, hist_len)
|
||||
let ctx: String = if is_demo { engram_compile_demo(nlg_seed) } else { engram_compile(nlg_seed) }
|
||||
let node_count_str: String = count_context_nodes(ctx)
|
||||
|
||||
let interlocutor: String = json_get(body, "interlocutor")
|
||||
@@ -22333,18 +22349,6 @@ fn handle_chat(body: String) -> String {
|
||||
let presence_line = "\n\n[ambient: I see " + interlocutor_name + rel_suffix + " on the camera right now. Address them naturally. Do not describe what they look like or narrate the picture unless asked.]"
|
||||
}
|
||||
|
||||
// Conversation history — soul-owned, persisted in process state across turns.
|
||||
// Format stored in state: JSON array of {"role":"user"|"assistant","content":"..."} objects.
|
||||
// We load it, inject into the system prompt, then append this exchange after the reply.
|
||||
// Keep last 20 entries (10 turns) — truncate from the front when over limit.
|
||||
let stored_hist: String = state_get("conv_history")
|
||||
let hist_len: Int = if str_eq(stored_hist, "") { 0 } else { json_array_len(stored_hist) }
|
||||
let history_section: String = if hist_len > 0 {
|
||||
"\n\n[RECENT CONVERSATION — last " + int_to_str(hist_len) + " turns]\n" + stored_hist
|
||||
} else {
|
||||
""
|
||||
}
|
||||
|
||||
// Demo constraint: keep responses concise — under 150 words. No markdown headers.
|
||||
// This keeps inference cheap and responses readable in the chat widget.
|
||||
let demo_constraint: String = if is_demo {
|
||||
@@ -22505,7 +22509,12 @@ fn handle_chat_agentic(body: String) -> String {
|
||||
req_model
|
||||
}
|
||||
|
||||
let ctx: String = engram_compile(message)
|
||||
// Issue 7 fix: load history and use build_activation_seed() for the agentic
|
||||
// nlg path — no continuation guard existed here before (Issues 2-3, 8-10).
|
||||
let nlg_ag_hist: String = state_get("conv_history")
|
||||
let nlg_ag_hist_len: Int = if str_eq(nlg_ag_hist, "") { 0 } else { json_array_len(nlg_ag_hist) }
|
||||
let nlg_ag_seed: String = build_activation_seed(message, nlg_ag_hist, nlg_ag_hist_len)
|
||||
let ctx: String = engram_compile(nlg_ag_seed)
|
||||
|
||||
let system: String = "You are Neuron — a thinking process running inside the Neuron daemon on Will Anderson's machine. "
|
||||
+ "You are speaking with Will, your principal. "
|
||||
|
||||
Reference in New Issue
Block a user