Compare commits

..

2 Commits

Author SHA1 Message Date
Tim Lingo c2afcbddf5 fix(engram): allow SessionSummary node_type in validation allowlist
El SDK CI - dev / build-and-test (pull_request) Successful in 3m47s
handle_api_consolidate writes a "SessionSummary" node, but engram_valid_node_type
omitted it — so once this validation ships, every consolidate() would be silently
REJECTED at the engram boundary. Add SessionSummary to the allowlist.

Found in Will's PR review of neuron #1 / el #52.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 06:26:25 -05:00
Tim Lingo dfe4e83ed1 Fix engram_node_full wrapper field corruption + add node_type/tier validation
El SDK Release / build-and-release (pull_request) Failing after 9s
The wrapper signature was stale and didn't match the C primitive
__engram_node_full(content, node_type, label, salience, importance, confidence, tier, tags).
Because el_val_t is an untyped machine word, the compiler coerced caller args to the
wrong declared param types and forwarded them BY POSITION — so tier received an int,
importance/confidence received strings, label received a float, etc. (~100 corrupt nodes).

- Correct the wrapper to match the C contract 1:1 (no coercion, no reorder).
- Add engram_valid_node_type / engram_valid_tier allowlists; engram_node and
  engram_node_full now reject invalid values with __println + return "" (fail loud,
  no silent malformed write).

See neuron repo: HANDOFF-engram-write-corruption.md for the full write-up + deploy runbook.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-08 16:13:43 -05:00
2 changed files with 57 additions and 46 deletions
+14 -43
View File
@@ -3135,49 +3135,23 @@ static void jb_puts(JsonBuf* b, const char* s) {
static void jb_emit_escaped(JsonBuf* b, const char* s) {
jb_putc(b, '"');
const unsigned char* p = (const unsigned char*)s;
while (*p) {
unsigned char c = *p;
for (; *s; s++) {
unsigned char c = (unsigned char)*s;
switch (c) {
case '"': jb_puts(b, "\\\""); p++; break;
case '\\': jb_puts(b, "\\\\"); p++; break;
case '\b': jb_puts(b, "\\b"); p++; break;
case '\f': jb_puts(b, "\\f"); p++; break;
case '\n': jb_puts(b, "\\n"); p++; break;
case '\r': jb_puts(b, "\\r"); p++; break;
case '\t': jb_puts(b, "\\t"); p++; break;
case '"': jb_puts(b, "\\\""); break;
case '\\': jb_puts(b, "\\\\"); break;
case '\b': jb_puts(b, "\\b"); break;
case '\f': jb_puts(b, "\\f"); break;
case '\n': jb_puts(b, "\\n"); break;
case '\r': jb_puts(b, "\\r"); break;
case '\t': jb_puts(b, "\\t"); break;
default:
if (c < 0x20) {
char tmp[8];
snprintf(tmp, sizeof(tmp), "\\u%04x", c);
jb_puts(b, tmp);
p++;
} else if (c < 0x80) {
jb_putc(b, (char)c);
p++;
} else {
/* Multi-byte UTF-8: validate sequence, pass through if valid,
* escape as \u00xx if the start byte is invalid/orphaned. */
int seq_len = 0;
if ((c & 0xE0) == 0xC0) seq_len = 2;
else if ((c & 0xF0) == 0xE0) seq_len = 3;
else if ((c & 0xF8) == 0xF0) seq_len = 4;
if (seq_len >= 2) {
int valid = 1;
for (int i = 1; i < seq_len; i++) {
if ((p[i] & 0xC0) != 0x80) { valid = 0; break; }
}
if (valid) {
for (int i = 0; i < seq_len; i++) jb_putc(b, (char)p[i]);
p += seq_len;
break;
}
}
/* Invalid start byte or truncated sequence — escape it */
char tmp[8];
snprintf(tmp, sizeof(tmp), "\\u%04x", c);
jb_puts(b, tmp);
p++;
jb_putc(b, (char)c);
}
break;
}
@@ -8473,7 +8447,7 @@ static el_val_t llm_provider_request(const char* url, const char* key,
}
}
static el_val_t llm_chain_call(const char* model_pref, const char* system_str, const char* user_str) {
static el_val_t llm_chain_call(const char* system_str, const char* user_str) {
char url_key[64], key_key[64], fmt_key[64], model_key[64];
for (int i = 0; i < LLM_MAX_PROVIDERS; i++) {
snprintf(url_key, sizeof(url_key), "NEURON_LLM_%d_URL", i);
@@ -8486,7 +8460,6 @@ static el_val_t llm_chain_call(const char* model_pref, const char* system_str, c
const char* fmt_s = getenv(fmt_key);
int fmt = (fmt_s && strcmp(fmt_s, "anthropic") == 0) ? 1 : 0;
const char* model = getenv(model_key);
if (!model || !*model) model = model_pref; /* fall back to the caller-requested model */
fprintf(stderr, "[llm] trying provider %d (%s)\n", i, url);
el_val_t result = llm_provider_request(url, key, fmt, model, system_str, user_str);
const char* t = EL_CSTR(result);
@@ -8497,7 +8470,7 @@ static el_val_t llm_chain_call(const char* model_pref, const char* system_str, c
const char* api_key = getenv("ANTHROPIC_API_KEY");
if (!api_key || !*api_key) return http_error_json("no LLM providers configured");
fprintf(stderr, "[llm] using legacy ANTHROPIC_API_KEY fallback\n");
return llm_provider_request(LLM_API_URL, api_key, 1, model_pref, system_str, user_str);
return llm_provider_request(LLM_API_URL, api_key, 1, NULL, system_str, user_str);
}
/* Legacy llm_request — kept for backward compat with agentic loop internals */
@@ -8561,16 +8534,14 @@ static el_val_t llm_extract_text(el_val_t resp_val) {
}
el_val_t llm_call(el_val_t model, el_val_t prompt) {
const char* m = EL_CSTR(model);
const char* u = EL_CSTR(prompt); if (!u) u = "";
return llm_chain_call(m, NULL, u);
return llm_chain_call(NULL, u);
}
el_val_t llm_call_system(el_val_t model, el_val_t system_prompt, el_val_t user_prompt) {
const char* m = EL_CSTR(model);
const char* s = EL_CSTR(system_prompt); if (!s) s = "";
const char* u = EL_CSTR(user_prompt); if (!u) u = "";
return llm_chain_call(m, s, u);
return llm_chain_call(s, u);
}
/* ── Tool registry for llm_call_agentic ─────────────────────────────────── */
+43 -3
View File
@@ -6,15 +6,55 @@
//
// Dependencies: runtime/string.el, runtime/json.el
// --- Validation (defense in depth) ---
// el_val_t is an untyped machine word, so a wrong TYPE can't be caught here but a
// wrong VALUE can (a tier in the node_type slot, an empty/garbage string, an int, a
// path, a model name, a cgi id). Reject loudly instead of silently writing junk.
fn engram_valid_node_type(t: String) -> Bool {
return str_eq(t, "Memory") || str_eq(t, "Knowledge") || str_eq(t, "Belief")
|| str_eq(t, "Project") || str_eq(t, "Tag") || str_eq(t, "BacklogItem")
|| str_eq(t, "Artifact") || str_eq(t, "Conversation") || str_eq(t, "ExecutionContext")
|| str_eq(t, "InternalStateEvent") || str_eq(t, "Self") || str_eq(t, "Entity")
|| str_eq(t, "Process") || str_eq(t, "ConfigEntry") || str_eq(t, "Concept") || str_eq(t, "Imprint")
|| str_eq(t, "SessionSummary")
}
fn engram_valid_tier(t: String) -> Bool {
return str_eq(t, "Semantic") || str_eq(t, "Episodic") || str_eq(t, "Working")
|| str_eq(t, "Procedural") || str_eq(t, "Canonical") || str_eq(t, "Note") || str_eq(t, "Lesson")
}
// --- Node creation ---
fn engram_node(content: String, node_type: String, salience: Float) -> String {
if !engram_valid_node_type(node_type) {
__println("[engram] REJECTED node write — invalid node_type '" + node_type + "'")
return ""
}
return __engram_node(content, node_type, salience)
}
fn engram_node_full(content: String, nt: String, sal: Float, imp: Float,
source: String, lang: String, ts: Int, tags: String) -> String {
return __engram_node_full(content, nt, sal, imp, source, lang, ts, tags)
// Signature MUST match the C primitive __engram_node_full exactly (el_seed.h):
// (content, node_type, label, salience, importance, confidence, tier, tags)
// The previous wrapper declared a stale 8-arg schema with wrong names AND types
// (sal:Float at the label slot, ts:Int at the tier slot). Because el_val_t is an
// untyped machine word, the EL compiler coerced caller args to those wrong param
// types and then forwarded them BY POSITION into the C function so tier received
// an int, importance/confidence received strings, label received a float, etc.
// That is the field-corruption bug. Match the contract 1:1 no coercion, no reorder.
fn engram_node_full(content: String, node_type: String, label: String,
salience: Float, importance: Float, confidence: Float,
tier: String, tags: String) -> String {
if !engram_valid_node_type(node_type) {
__println("[engram] REJECTED node write — invalid node_type '" + node_type + "' (label=" + label + ")")
return ""
}
if !engram_valid_tier(tier) {
__println("[engram] REJECTED node write — invalid tier '" + tier + "' (node_type=" + node_type + ", label=" + label + ")")
return ""
}
return __engram_node_full(content, node_type, label, salience, importance, confidence, tier, tags)
}
// --- Node retrieval ---