Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 89e45ed689 | |||
| 8d4c5f34bf | |||
| 35c189759c | |||
| 5c94b8680d | |||
| cebf3ded62 | |||
| b83ecf52f9 | |||
| 15ea584671 | |||
| c2afcbddf5 | |||
| dbf2c659d9 | |||
| 2b8062c55f | |||
| dfe4e83ed1 | |||
| a390ee494e | |||
| 8212e12e57 | |||
| 2ed6b26dde | |||
| d8e9fd12f4 | |||
| 8fa9c4ba20 | |||
| 9c7bde47dc | |||
| c0553459e1 | |||
| fd208583fe | |||
| 3e29fc43ab | |||
| 979a5677d5 | |||
| 17b1aa0736 | |||
| f0c731d2db | |||
| e7e0f7d3e5 |
@@ -1475,10 +1475,13 @@ static void http_send_response(int fd, const char* body) {
|
||||
}
|
||||
|
||||
const char* eff_body = is_envelope ? env_body : body;
|
||||
/* Use the real byte count from fs_read if available (handles binary files
|
||||
* with embedded null bytes — PNG, WOFF2, etc.). Fall back to strlen for
|
||||
* normal text/JSON responses where _tl_fs_read_len is 0. */
|
||||
size_t blen = (_tl_fs_read_len > 0) ? _tl_fs_read_len : strlen(eff_body);
|
||||
/* Use max(strlen, fs_read_len). fs_read_len is the real byte count for binary
|
||||
* files (strlen stops at embedded NULs — PNG, WOFF2). strlen is correct AND larger
|
||||
* when a handler WRAPS fs_read output in a longer text/JSON response (e.g.
|
||||
* /api/safety-contact returns {"configured":...,"contact": <file>}); using
|
||||
* fs_read_len alone truncated those responses to the file's length. */
|
||||
size_t _blen_s = strlen(eff_body);
|
||||
size_t blen = (_tl_fs_read_len > _blen_s) ? _tl_fs_read_len : _blen_s;
|
||||
_tl_fs_read_len = 0; /* consume — one-shot per response */
|
||||
int head_only = _tl_http_head_only;
|
||||
|
||||
@@ -1552,7 +1555,8 @@ static void* http_worker(void* arg) {
|
||||
/* Copy response out BEFORE arena teardown.
|
||||
* For binary files, _tl_fs_read_len holds the real byte count —
|
||||
* use memcpy instead of strdup so null bytes are preserved. */
|
||||
size_t rlen = _tl_fs_read_len > 0 ? _tl_fs_read_len : (rs ? strlen(rs) : 0);
|
||||
size_t _rlen_s = rs ? strlen(rs) : 0;
|
||||
size_t rlen = (_tl_fs_read_len > _rlen_s) ? _tl_fs_read_len : _rlen_s;
|
||||
response = malloc(rlen + 1);
|
||||
if (response && rs) { memcpy(response, rs, rlen); response[rlen] = '\0'; }
|
||||
else if (response) { response[0] = '\0'; }
|
||||
@@ -1799,7 +1803,8 @@ static void* http_worker_v2(void* arg) {
|
||||
el_val_t hmap = http_build_headers_map(hdr_block ? hdr_block : "");
|
||||
el_val_t r = h(EL_STR(dispatch_method), EL_STR(path), hmap, EL_STR(body));
|
||||
const char* rs = EL_CSTR(r);
|
||||
size_t rlen = _tl_fs_read_len > 0 ? _tl_fs_read_len : (rs ? strlen(rs) : 0);
|
||||
size_t _rlen_s = rs ? strlen(rs) : 0;
|
||||
size_t rlen = (_tl_fs_read_len > _rlen_s) ? _tl_fs_read_len : _rlen_s;
|
||||
response = malloc(rlen + 1);
|
||||
if (response && rs) { memcpy(response, rs, rlen); response[rlen] = '\0'; }
|
||||
else if (response) { response[0] = '\0'; }
|
||||
@@ -3173,23 +3178,49 @@ static void jb_puts(JsonBuf* b, const char* s) {
|
||||
|
||||
static void jb_emit_escaped(JsonBuf* b, const char* s) {
|
||||
jb_putc(b, '"');
|
||||
for (; *s; s++) {
|
||||
unsigned char c = (unsigned char)*s;
|
||||
const unsigned char* p = (const unsigned char*)s;
|
||||
while (*p) {
|
||||
unsigned char c = *p;
|
||||
switch (c) {
|
||||
case '"': jb_puts(b, "\\\""); break;
|
||||
case '\\': jb_puts(b, "\\\\"); break;
|
||||
case '\b': jb_puts(b, "\\b"); break;
|
||||
case '\f': jb_puts(b, "\\f"); break;
|
||||
case '\n': jb_puts(b, "\\n"); break;
|
||||
case '\r': jb_puts(b, "\\r"); break;
|
||||
case '\t': jb_puts(b, "\\t"); break;
|
||||
case '"': jb_puts(b, "\\\""); p++; break;
|
||||
case '\\': jb_puts(b, "\\\\"); p++; break;
|
||||
case '\b': jb_puts(b, "\\b"); p++; break;
|
||||
case '\f': jb_puts(b, "\\f"); p++; break;
|
||||
case '\n': jb_puts(b, "\\n"); p++; break;
|
||||
case '\r': jb_puts(b, "\\r"); p++; break;
|
||||
case '\t': jb_puts(b, "\\t"); p++; break;
|
||||
default:
|
||||
if (c < 0x20) {
|
||||
char tmp[8];
|
||||
snprintf(tmp, sizeof(tmp), "\\u%04x", c);
|
||||
jb_puts(b, tmp);
|
||||
} else {
|
||||
p++;
|
||||
} else if (c < 0x80) {
|
||||
jb_putc(b, (char)c);
|
||||
p++;
|
||||
} else {
|
||||
/* Multi-byte UTF-8: validate sequence, pass through if valid,
|
||||
* escape as \u00xx if the start byte is invalid/orphaned. */
|
||||
int seq_len = 0;
|
||||
if ((c & 0xE0) == 0xC0) seq_len = 2;
|
||||
else if ((c & 0xF0) == 0xE0) seq_len = 3;
|
||||
else if ((c & 0xF8) == 0xF0) seq_len = 4;
|
||||
if (seq_len >= 2) {
|
||||
int valid = 1;
|
||||
for (int i = 1; i < seq_len; i++) {
|
||||
if ((p[i] & 0xC0) != 0x80) { valid = 0; break; }
|
||||
}
|
||||
if (valid) {
|
||||
for (int i = 0; i < seq_len; i++) jb_putc(b, (char)p[i]);
|
||||
p += seq_len;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Invalid start byte or truncated sequence — escape it */
|
||||
char tmp[8];
|
||||
snprintf(tmp, sizeof(tmp), "\\u%04x", c);
|
||||
jb_puts(b, tmp);
|
||||
p++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -6219,7 +6250,9 @@ static void engram_grow_edges(void) {
|
||||
static char* engram_new_id(void) {
|
||||
el_val_t v = uuid_new();
|
||||
const char* s = EL_CSTR(v);
|
||||
return el_strdup(s ? s : "");
|
||||
/* Persistent: node ids live in the global store; an arena (el_strdup) id is
|
||||
* freed at el_request_end(), corrupting the node after the creating request. */
|
||||
return el_strdup_persist(s ? s : "");
|
||||
}
|
||||
|
||||
/* Convert a node into an ElMap of its fields. */
|
||||
@@ -6314,12 +6347,17 @@ el_val_t engram_node_full(el_val_t content, el_val_t node_type, el_val_t label,
|
||||
const char* lb = EL_CSTR(label);
|
||||
const char* ti = EL_CSTR(tier);
|
||||
const char* tg = EL_CSTR(tags);
|
||||
n->content = el_strdup(c ? c : "");
|
||||
n->node_type = el_strdup(nt && *nt ? nt : "Memory");
|
||||
n->label = el_strdup(lb && *lb ? lb : (c ? engram_first_n_chars(c, 60) : ""));
|
||||
n->tier = el_strdup(ti && *ti ? ti : "Working");
|
||||
n->tags = el_strdup(tg ? tg : "");
|
||||
n->metadata = el_strdup("{}");
|
||||
/* Persistent (el_strdup_persist, NOT el_strdup): these strings are owned by the
|
||||
* persistent global node store. el_strdup tracks into the per-request arena, which
|
||||
* el_request_end() frees when the creating HTTP request completes — leaving the
|
||||
* stored node with dangling pointers (corrupted ids, "saved but never listed").
|
||||
* This is the root cause of the hallucinated/lost-saves class of bugs. */
|
||||
n->content = el_strdup_persist(c ? c : "");
|
||||
n->node_type = el_strdup_persist(nt && *nt ? nt : "Memory");
|
||||
n->label = el_strdup_persist(lb && *lb ? lb : (c ? engram_first_n_chars(c, 60) : ""));
|
||||
n->tier = el_strdup_persist(ti && *ti ? ti : "Working");
|
||||
n->tags = el_strdup_persist(tg ? tg : "");
|
||||
n->metadata = el_strdup_persist("{}");
|
||||
n->salience = engram_decode_score(salience);
|
||||
n->importance = engram_decode_score(importance);
|
||||
n->confidence = engram_decode_score(confidence);
|
||||
@@ -7262,13 +7300,48 @@ el_val_t engram_save(el_val_t path) {
|
||||
jb_putc(&b, '}');
|
||||
}
|
||||
jb_puts(&b, "]}");
|
||||
FILE* f = fopen(p, "wb");
|
||||
if (!f) { free(b.buf); return 0; }
|
||||
|
||||
/* --- Anti-clobber sparse-write floor (NTN engram clobber fix) ---------
|
||||
* Refuse to overwrite an existing populated snapshot with a drastically
|
||||
* smaller one. A bad boot that loaded only ~63 identity nodes must never
|
||||
* be able to clobber a healthy 5000+ node snapshot, regardless of the
|
||||
* upstream cause (genesis fallback, partial load, etc.). */
|
||||
{
|
||||
struct stat _st;
|
||||
if (stat(p, &_st) == 0 && _st.st_size > 200000 &&
|
||||
(uint64_t)b.len < (uint64_t)_st.st_size / 16) {
|
||||
fprintf(stderr,
|
||||
"[engram_save] REFUSED sparse write: new %zu bytes vs existing "
|
||||
"%lld bytes (< 1/16) — protecting snapshot %s\n",
|
||||
b.len, (long long)_st.st_size, p);
|
||||
free(b.buf);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* --- Atomic write: tmp + fsync + rename ------------------------------
|
||||
* Write to a sibling temp file, fsync it durable, then rename() over the
|
||||
* target. rename() is atomic on POSIX, so a concurrent reader (a booting
|
||||
* soul's engram_load) never observes a truncated or 0-byte snapshot —
|
||||
* which was the root of the genesis/clobber loop. */
|
||||
size_t _plen = strlen(p);
|
||||
char* _tmp = (char*)malloc(_plen + 5);
|
||||
if (!_tmp) { free(b.buf); return 0; }
|
||||
memcpy(_tmp, p, _plen);
|
||||
memcpy(_tmp + _plen, ".tmp", 5); /* includes NUL */
|
||||
|
||||
FILE* f = fopen(_tmp, "wb");
|
||||
if (!f) { free(_tmp); free(b.buf); return 0; }
|
||||
size_t w = fwrite(b.buf, 1, b.len, f);
|
||||
int wok = (w == b.len);
|
||||
if (wok) { fflush(f); fsync(fileno(f)); }
|
||||
fclose(f);
|
||||
int ok = (w == b.len);
|
||||
free(b.buf);
|
||||
return ok ? 1 : 0;
|
||||
|
||||
if (!wok) { unlink(_tmp); free(_tmp); return 0; }
|
||||
if (rename(_tmp, p) != 0) { unlink(_tmp); free(_tmp); return 0; }
|
||||
free(_tmp);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Helper: extract a string field from a JSON object substring. */
|
||||
|
||||
+43
-3
@@ -6,15 +6,55 @@
|
||||
//
|
||||
// Dependencies: runtime/string.el, runtime/json.el
|
||||
|
||||
// --- Validation (defense in depth) ---
|
||||
// el_val_t is an untyped machine word, so a wrong TYPE can't be caught here — but a
|
||||
// wrong VALUE can (a tier in the node_type slot, an empty/garbage string, an int, a
|
||||
// path, a model name, a cgi id). Reject loudly instead of silently writing junk.
|
||||
|
||||
fn engram_valid_node_type(t: String) -> Bool {
|
||||
return str_eq(t, "Memory") || str_eq(t, "Knowledge") || str_eq(t, "Belief")
|
||||
|| str_eq(t, "Project") || str_eq(t, "Tag") || str_eq(t, "BacklogItem")
|
||||
|| str_eq(t, "Artifact") || str_eq(t, "Conversation") || str_eq(t, "ExecutionContext")
|
||||
|| str_eq(t, "InternalStateEvent") || str_eq(t, "Self") || str_eq(t, "Entity")
|
||||
|| str_eq(t, "Process") || str_eq(t, "ConfigEntry") || str_eq(t, "Concept") || str_eq(t, "Imprint")
|
||||
|| str_eq(t, "SessionSummary")
|
||||
}
|
||||
|
||||
fn engram_valid_tier(t: String) -> Bool {
|
||||
return str_eq(t, "Semantic") || str_eq(t, "Episodic") || str_eq(t, "Working")
|
||||
|| str_eq(t, "Procedural") || str_eq(t, "Canonical") || str_eq(t, "Note") || str_eq(t, "Lesson")
|
||||
}
|
||||
|
||||
// --- Node creation ---
|
||||
|
||||
fn engram_node(content: String, node_type: String, salience: Float) -> String {
|
||||
if !engram_valid_node_type(node_type) {
|
||||
__println("[engram] REJECTED node write — invalid node_type '" + node_type + "'")
|
||||
return ""
|
||||
}
|
||||
return __engram_node(content, node_type, salience)
|
||||
}
|
||||
|
||||
fn engram_node_full(content: String, nt: String, sal: Float, imp: Float,
|
||||
source: String, lang: String, ts: Int, tags: String) -> String {
|
||||
return __engram_node_full(content, nt, sal, imp, source, lang, ts, tags)
|
||||
// Signature MUST match the C primitive __engram_node_full exactly (el_seed.h):
|
||||
// (content, node_type, label, salience, importance, confidence, tier, tags)
|
||||
// The previous wrapper declared a stale 8-arg schema with wrong names AND types
|
||||
// (sal:Float at the label slot, ts:Int at the tier slot). Because el_val_t is an
|
||||
// untyped machine word, the EL compiler coerced caller args to those wrong param
|
||||
// types and then forwarded them BY POSITION into the C function — so tier received
|
||||
// an int, importance/confidence received strings, label received a float, etc.
|
||||
// That is the field-corruption bug. Match the contract 1:1 — no coercion, no reorder.
|
||||
fn engram_node_full(content: String, node_type: String, label: String,
|
||||
salience: Float, importance: Float, confidence: Float,
|
||||
tier: String, tags: String) -> String {
|
||||
if !engram_valid_node_type(node_type) {
|
||||
__println("[engram] REJECTED node write — invalid node_type '" + node_type + "' (label=" + label + ")")
|
||||
return ""
|
||||
}
|
||||
if !engram_valid_tier(tier) {
|
||||
__println("[engram] REJECTED node write — invalid tier '" + tier + "' (node_type=" + node_type + ", label=" + label + ")")
|
||||
return ""
|
||||
}
|
||||
return __engram_node_full(content, node_type, label, salience, importance, confidence, tier, tags)
|
||||
}
|
||||
|
||||
// --- Node retrieval ---
|
||||
|
||||
Executable
+21
@@ -0,0 +1,21 @@
|
||||
#!/bin/sh
|
||||
# build-soul-darwin.sh — replicate `elb` on macOS/arm64 with clang.
|
||||
# Proven 2026-06-16: produces a Mach-O arm64 soul that boots and serves :7770.
|
||||
# The official builder `elb` ships Linux-only (CI); this lets us build + test the
|
||||
# darwin soul locally (e.g. to validate the atomic engram_save fix in isolation).
|
||||
#
|
||||
# Usage: scripts/build-soul-darwin.sh <path-to-neuron/dist> [output-binary]
|
||||
set -e
|
||||
DIST="${1:?usage: build-soul-darwin.sh <neuron/dist dir> [out]}"
|
||||
OUT="${2:-./neuron}"
|
||||
RT="$(cd "$(dirname "$0")/.." && pwd)/lang/el-compiler/runtime"
|
||||
B="$(mktemp -d)"
|
||||
# elc-generated dist modules use C89-style implicit cross-module declarations that
|
||||
# Apple clang rejects as errors by default; resolve at link, so downgrade them.
|
||||
CFLAGS="-Wno-implicit-function-declaration -Wno-implicit-int -Wno-int-conversion -I$B -I$DIST -I$RT"
|
||||
cp "$RT/el_runtime.h" "$B/"
|
||||
clang -c $CFLAGS "$RT/el_runtime.c" -o "$B/el_runtime.o"
|
||||
for c in "$DIST"/*.c; do clang -c $CFLAGS "$c" -o "$B/$(basename "$c" .c).o"; done
|
||||
# NOTE: link *.o once — do not also list el_runtime.o separately (duplicate symbols).
|
||||
clang "$B"/*.o -o "$OUT" -lcurl -lm
|
||||
echo "built $OUT"
|
||||
Reference in New Issue
Block a user