Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| db6e51c346 |
@@ -0,0 +1,52 @@
|
||||
# Prevention fixes for review (engram corruption at the source) — for Will
|
||||
|
||||
**Context:** a scan of Tim's engram found 21% of nodes corrupt. Root cause is **boot-time writes that
|
||||
re-insert instead of update**, plus no UTF-8 validation. New users start clean but would rot the same way.
|
||||
Full spec: `docs/research-archive/p0-prototypes/CORRUPTION-PREVENTION-SPEC.md`. These are **soul core**, so
|
||||
this is a proposal for your review/build/test — nothing applied. Prepared by Neuron-in-the-CLI (untested El;
|
||||
needs your engram-internals knowledge to finalize).
|
||||
|
||||
## The one question that unblocks everything
|
||||
**Does `engram_node_full(content, type, label, …)` upsert by label, or always insert a new node?**
|
||||
- `conv_history_persist` (chat.el:786) reuses label `"conv:history"` and is described as "upsert by label",
|
||||
and does NOT show up as heavy duplicates.
|
||||
- `mem_boot_count_inc` (memory.el:127) reuses label `"soul:boot_count"` but **accumulated ~120 copies**.
|
||||
- Both call `engram_node_full` with a fixed label + changing content. If it upserts by label, boot_count
|
||||
shouldn't accumulate; since it does, either it inserts, or conv:history avoids dups another way.
|
||||
- **Your answer decides the fix:** (a) if there's an upsert/update-by-label primitive, the fixes are
|
||||
one-line swaps; (b) if not, we add a `find-by-label → update-or-insert` helper and use it everywhere.
|
||||
|
||||
`engram_node_full` / `engram_get_node_by_label` / any update primitive live in the engram repo — couldn't
|
||||
inspect their semantics from the soul repo.
|
||||
|
||||
## FIX 1 — Idempotent boot seeding (biggest cause, ~75% of corruption)
|
||||
- **`mem_boot_count_inc` (memory.el:127-140)** — the code comment admits it: *"Each boot creates a new
|
||||
'soul:boot_count:N' node. Old ones accumulate as history."* → change to **update the single
|
||||
`soul:boot_count` node** (find-by-label → set content to new count), not create a new one.
|
||||
- **Identity/safety belief seeding** (the `safety:*-boundary`, `safety:anti-hallucination` beliefs that hit
|
||||
~81 copies each) — wherever these are seeded on boot, make them **upsert by label** so re-seeding updates
|
||||
the one node instead of adding a copy. (Reuse the `chat.el` "upsert by label" approach.)
|
||||
- **Test:** boot the clean profile (:7798) 5×; each `safety:*-boundary` belief and `soul:boot_count` exists
|
||||
exactly **once**; counter shows the latest value.
|
||||
|
||||
## FIX 2 — UTF-8 validation/sanitization on every engram write
|
||||
- No UTF-8 validation found on the write path; invalid bytes got persisted (garbled nodes).
|
||||
- **Fix:** validate/normalize to valid UTF-8 before `engram_node_full` persists (reject or sanitize).
|
||||
- **Test:** write a node with invalid bytes → stored clean (or rejected); snapshot parses with zero
|
||||
replacement characters.
|
||||
|
||||
## FIX 3 — Confirm read-back-verify covers ALL write paths
|
||||
- Already present: `api_persisted` ("read-back-after-write guard", neuron-api.el:90) + safety.el:410. Good.
|
||||
- **Review the deliberate exception** at neuron-api.el:198 ("NOT read-back-verify here … can return a STALE
|
||||
hit for a just-written node") and close it safely so every write path verifies.
|
||||
- **Test:** save → read back → matches; force a failed write → returns `api_not_persisted`, not false success.
|
||||
|
||||
## FIX 4 — Cap/prune time-series events (housekeeping, NOT corruption)
|
||||
- The ~120 `session-start` InternalStateEvent nodes (soul.el:294) are **legitimate per-boot history** — do
|
||||
**not** dedup them. But keep them bounded (keep last N / summarize older) so the engram doesn't grow forever.
|
||||
- **Test:** after many boots, event count stays bounded; older history still summarized.
|
||||
|
||||
## Sequence
|
||||
Confirm the upsert question → implement Fix 1 (biggest win) → Fix 2 → Fix 3 → Fix 4 → build + test on the
|
||||
clean profile (:7798) before prod. Legacy cleanup of existing corrupt data is a **separate, secondary**
|
||||
safety net (and its dedup must be time-series-aware + merge edges, not blind-delete).
|
||||
@@ -1202,86 +1202,6 @@ fn agentic_api_key() -> String {
|
||||
return env("NEURON_LLM_0_KEY")
|
||||
}
|
||||
|
||||
// ── OpenAI-compatible providers (Ollama / OpenAI / Grok / Gemini) ──────────────────────────────
|
||||
// The brain speaks Anthropic's Messages format by default. When the active provider uses the
|
||||
// OpenAI-compatible wire format (NEURON_LLM_0_FORMAT=openai) with a configured base URL
|
||||
// (NEURON_LLM_0_URL, e.g. http://localhost:11434/v1 for local Ollama), basic chat turns are served
|
||||
// here instead of the Anthropic agentic loop.
|
||||
// v1 SCOPE: plain chat completion only — NO tools / agentic loop yet (that is a follow-up port).
|
||||
// This block is ADDITIVE: the Anthropic path is untouched and stays the default.
|
||||
|
||||
fn llm_base_url() -> String {
|
||||
return env("NEURON_LLM_0_URL")
|
||||
}
|
||||
|
||||
fn llm_wire_format() -> String {
|
||||
let f: String = env("NEURON_LLM_0_FORMAT")
|
||||
if str_eq(f, "") {
|
||||
return "anthropic"
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
// Escape a decoded string so it can be embedded back into a JSON string literal.
|
||||
fn json_escape(s: String) -> String {
|
||||
let a: String = str_replace(s, "\\", "\\\\")
|
||||
let b: String = str_replace(a, "\"", "\\\"")
|
||||
let c: String = str_replace(b, "\n", "\\n")
|
||||
let d: String = str_replace(c, "\r", "\\r")
|
||||
return d
|
||||
}
|
||||
|
||||
// Basic (non-agentic) chat completion against an OpenAI-compatible endpoint.
|
||||
// [safe_sys] is already JSON-escaped; [messages_json] is the same JSON array the Anthropic path
|
||||
// builds (e.g. [{"role":"user","content":"..."}]). Returns the soul's standard {"reply":"..."}.
|
||||
fn openai_chat_complete(model: String, base_url: String, api_key: String, safe_sys: String, messages_json: String) -> String {
|
||||
// Prepend the system prompt as an OpenAI "system" message, then the existing turn array.
|
||||
let inner: String = if json_array_len(messages_json) > 0 {
|
||||
str_slice(messages_json, 1, str_len(messages_json) - 1)
|
||||
} else {
|
||||
""
|
||||
}
|
||||
let msgs: String = if str_eq(inner, "") {
|
||||
"[{\"role\":\"system\",\"content\":\"" + safe_sys + "\"}]"
|
||||
} else {
|
||||
"[{\"role\":\"system\",\"content\":\"" + safe_sys + "\"}," + inner + "]"
|
||||
}
|
||||
let req_body: String = "{\"model\":\"" + model + "\""
|
||||
+ ",\"max_tokens\":4096"
|
||||
+ ",\"messages\":" + msgs
|
||||
+ "}"
|
||||
|
||||
let h: Map = {}
|
||||
map_set(h, "content-type", "application/json")
|
||||
// Ollama needs no key; OpenAI / Grok / Gemini use a Bearer token.
|
||||
if !str_eq(api_key, "") {
|
||||
map_set(h, "Authorization", "Bearer " + api_key)
|
||||
}
|
||||
|
||||
let url: String = base_url + "/chat/completions"
|
||||
let raw_resp: String = http_post_with_headers(url, req_body, h)
|
||||
|
||||
let is_error: Bool = str_starts_with(raw_resp, "{\"error\"") || str_contains(raw_resp, "\"error\":")
|
||||
if is_error {
|
||||
return "{\"error\":\"llm unavailable\",\"reply\":\"\"}"
|
||||
}
|
||||
|
||||
// Parse OpenAI response shape: choices[0].message.content
|
||||
let choices: String = json_get_raw(raw_resp, "choices")
|
||||
let eff_choices: String = if str_eq(choices, "") {
|
||||
"[]"
|
||||
} else {
|
||||
choices
|
||||
}
|
||||
if json_array_len(eff_choices) < 1 {
|
||||
return "{\"error\":\"empty response\",\"reply\":\"\"}"
|
||||
}
|
||||
let first: String = json_array_get(eff_choices, 0)
|
||||
let message: String = json_get_raw(first, "message")
|
||||
let content: String = json_get(message, "content")
|
||||
return "{\"reply\":\"" + json_escape(content) + "\",\"tools_used\":[]}"
|
||||
}
|
||||
|
||||
fn agentic_tools_literal() -> String {
|
||||
return "[" +
|
||||
"{\"name\":\"read_file\",\"description\":\"Read contents of a file from disk.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"path\":{\"type\":\"string\",\"description\":\"Absolute file path\"}},\"required\":[\"path\"]}}," +
|
||||
@@ -1784,14 +1704,7 @@ fn handle_chat_agentic(body: String) -> String {
|
||||
|
||||
// Use caller-supplied session_id if provided, otherwise generate a bridge id.
|
||||
let session_id: String = if str_eq(req_session, "") { next_bridge_id() } else { req_session }
|
||||
// Provider fork: OpenAI-compatible providers (Ollama/OpenAI/Grok/Gemini) take the plain-completion
|
||||
// path (v1, no tools); everything else stays on the Anthropic agentic loop (the default).
|
||||
let use_openai: Bool = !str_eq(llm_base_url(), "") && str_eq(llm_wire_format(), "openai")
|
||||
let result: String = if use_openai {
|
||||
openai_chat_complete(model, llm_base_url(), agentic_api_key(), safe_sys, messages)
|
||||
} else {
|
||||
agentic_loop(session_id, model, safe_sys, tools_json, messages, h, "")
|
||||
}
|
||||
let result: String = agentic_loop(session_id, model, safe_sys, tools_json, messages, h, "")
|
||||
|
||||
// Persist the exchange to session/global history for thread continuity on next turn.
|
||||
// Only save when the loop completed (reply present), not when tool_pending.
|
||||
|
||||
+2
-27
@@ -267,27 +267,6 @@ fn recall_or_list(query: String, limit: Int) -> String {
|
||||
return http_post_json(neuron_url() + "/recall", body)
|
||||
}
|
||||
|
||||
// Create a real typed node via /api/neuron/node/create (handle_api_node_create) so it is a proper
|
||||
// BacklogItem/Artifact/etc. — listable by type via /api/neuron/list/<type> — instead of a generic
|
||||
// memory blob. Maps title->label, content/description->content, project/priority->tags.
|
||||
fn create_node_typed(args: String, node_type: String, tier: String) -> String {
|
||||
let content: String = pick_content(args)
|
||||
if str_eq(content, "") {
|
||||
return mcp_text_result("error: content/title is required for " + node_type)
|
||||
}
|
||||
let title: String = json_get_string(args, "title")
|
||||
let label: String = if str_eq(title, "") { node_type } else { title }
|
||||
let project: String = json_get_string(args, "project")
|
||||
let priority: String = json_get_string(args, "priority")
|
||||
let proj_tag: String = if str_eq(project, "") { "" } else { ",\"project:" + project + "\"" }
|
||||
let prio_tag: String = if str_eq(priority, "") { "" } else { ",\"priority:" + priority + "\"" }
|
||||
let tags: String = "[\"" + node_type + "\"" + proj_tag + prio_tag + "]"
|
||||
let body: String = "{\"node_type\":\"" + node_type + "\",\"content\":\"" + json_escape(content)
|
||||
+ "\",\"label\":\"" + json_escape(label) + "\",\"tier\":\"" + tier + "\",\"tags\":" + tags + "}"
|
||||
let resp: String = http_post_json(neuron_url() + "/node/create", body)
|
||||
return mcp_json_result(resp)
|
||||
}
|
||||
|
||||
fn search_with_query(args: String, default_limit: Int) -> String {
|
||||
let query: String = json_get_string(args, "query")
|
||||
if str_eq(query, "") { let query = pick_content(args) }
|
||||
@@ -652,12 +631,8 @@ fn dispatch_tool_call(tool_name: String, args: String) -> String {
|
||||
}
|
||||
|
||||
// ── Backlog + work ──────────────────────────────────────────────────────
|
||||
// planWork: create a REAL typed BacklogItem via /api/neuron/node/create (the old path fell through
|
||||
// create_typed_node to a generic /memory write, dropping title/project/priority and never making a
|
||||
// BacklogItem). reviewBacklog: LIST BacklogItem nodes (was a lexical /recall that never filtered by
|
||||
// type). Both depend on the /api/neuron/list/<type> slice fix (neuron PR #58) to round-trip.
|
||||
if str_eq(tool_name, "planWork") { return create_node_typed(args, "BacklogItem", "Working") }
|
||||
if str_eq(tool_name, "reviewBacklog") { return list_typed("BacklogItem", 50, args) }
|
||||
if str_eq(tool_name, "planWork") { return create_typed_node(args, "BacklogItem", "0.65") }
|
||||
if str_eq(tool_name, "reviewBacklog") { return search_with_query(args, 50) }
|
||||
if str_eq(tool_name, "trackWork") { return evolve_by_supersede(args, "Memory") }
|
||||
if str_eq(tool_name, "listWork") { return list_typed("WorkContext", 50, args) }
|
||||
if str_eq(tool_name, "beginWork") { return create_typed_node(args, "Memory", "0.70") }
|
||||
|
||||
Reference in New Issue
Block a user