self-review 2026-05-15: wire engram_cosine_sim into activation scoring

engram_cosine_sim was defined but never called. Nodes have 768-dim
nomic-embed-text vectors. Now:
- engram_embed_query() embeds the query string once per activate() call
- engram_goal_bias() takes (qvec, qdim) and adds cosine-similarity bonus
  up to +0.6 when sim > 0.5 — semantic relevance now augments lexical bias
- engram_wm_count() exposes working-memory-active node count to EL
- el_runtime.h declares engram_wm_count for soul-daemon linking
This commit is contained in:
2026-05-15 08:37:38 -05:00
parent 913a98329a
commit 9bcd68fbca
2 changed files with 90 additions and 6 deletions
+89 -6
View File
@@ -5532,8 +5532,9 @@ void el_cgi_init(el_val_t name, el_val_t dharma_id, el_val_t principal,
* 6. For each inhibitory edge where source has background_activation > 0:
* inhibition[target] = max(bg[source] * e->weight)
* 7. For each background-activated node:
* raw_wm = bg * goal_bias(node, query) * confidence
* raw_wm = bg * goal_bias(node, query, qvec, qdim) * confidence
* * (1 - (1 - INHIBITION_FACTOR) * inhibition)
* goal_bias includes cosine similarity when node + query embeddings exist.
* 8. Per-type threshold gate: raw_wm >= type_threshold promoted.
* Safety/DharmaSelf: 0.05 Canonical: 0.15 Lesson: 0.25
* Belief/Entity: 0.30 Note/Memory/Working: 0.40
@@ -5984,6 +5985,7 @@ static int engram_keys_init(void);
static int engram_write_binary(const char* path);
static int engram_load_binary(const char* path);
static void engram_embed_node(EngramNode* n);
static float* engram_embed_query(const char* text, uint32_t* dim_out);
static float engram_cosine_sim(const float* a, const float* b, uint32_t dim);
static void engram_checkpoint(void);
static void engram_emit_ise_internal(const char* content, const char* label);
@@ -6633,9 +6635,10 @@ static double engram_temporal_proximity_bonus(int64_t node_created,
*/
/* Compute goal-state bias multiplier for a node given the query.
* Returns a value in [0.3, 2.0]. This is a lightweight heuristic
* a production implementation may use LLM-derived intent classification. */
static double engram_goal_bias(const EngramNode* n, const char* query) {
* Returns a value in [0.3, 2.0]. Combines lexical heuristics with cosine
* similarity when both the node and query have embedding vectors. */
static double engram_goal_bias(const EngramNode* n, const char* query,
const float* qvec, uint32_t qdim) {
if (!query || !*query) return 1.0;
double bias = 1.0;
/* Direct lexical overlap: node content/label/tags share text with query. */
@@ -6643,6 +6646,16 @@ static double engram_goal_bias(const EngramNode* n, const char* query) {
istr_contains(n->tags, query)) {
bias += 0.5;
}
/* Semantic similarity via embedding cosine sim.
* When both embeddings are present, add up to +0.6 bonus scaled linearly
* from sim=0.5 (neutral) to sim=1.0 (identical). Below 0.5 is noise;
* we clamp to zero bonus there so lexical match still dominates. */
if (qvec && qdim > 0 && n->embedding && n->embedding_dim == qdim) {
float sim = engram_cosine_sim(n->embedding, qvec, qdim);
if (sim > 0.5f) {
bias += (double)(sim - 0.5f) * 1.2; /* max +0.6 at sim=1.0 */
}
}
/* Node-type resonance with query intent. */
int technical_query = istr_contains(query, "code") ||
istr_contains(query, "function") ||
@@ -6689,6 +6702,11 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
int64_t now_ms = engram_now_ms();
/* Embed the query string once so goal_bias can use cosine similarity.
* Falls back gracefully to NULL/0 if Ollama is unavailable. */
uint32_t qdim = 0;
float* qvec = engram_embed_query(q, &qdim);
/* Per-node layer-1 tracking. */
double* best_bg = calloc((size_t)g->node_count, sizeof(double));
int64_t* best_hops = calloc((size_t)g->node_count, sizeof(int64_t));
@@ -6906,8 +6924,9 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
EngramNode* n = &g->nodes[i];
/* Per-type threshold: safety nodes break through more easily. */
double type_threshold = engram_type_threshold(n->node_type, n->tier);
/* Goal bias weights the node's relevance to current intent. */
double bias = engram_goal_bias(n, q);
/* Goal bias weights the node's relevance to current intent.
* Pass query embedding (may be NULL if Ollama unavailable). */
double bias = engram_goal_bias(n, q, qvec, qdim);
/* Raw working memory score. */
double raw_wm = best_bg[i] * bias * n->confidence;
/* Apply inhibitory suppression. Full inhibition → scale by factor. */
@@ -7103,6 +7122,7 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
}
free(best_bg); free(best_hops); free(reached);
free(seeds); free(fr); free(inhibition); free(wm_weights); free(results);
free(qvec);
return out;
}
@@ -7287,6 +7307,59 @@ static uint32_t engram_parse_float_array(const char* json, float** out) {
return count;
}
/* Embed an arbitrary text string via Ollama nomic-embed-text.
* Returns a heap-allocated float array (caller must free) and sets *dim_out.
* Returns NULL on failure (Ollama unavailable, empty input, etc.). */
static float* engram_embed_query(const char* text, uint32_t* dim_out) {
*dim_out = 0;
if (!text || !*text) return NULL;
size_t tlen = strlen(text);
if (tlen > 2048) tlen = 2048;
char* body = malloc(tlen * 6 + 128);
if (!body) return NULL;
char* bp = body;
bp += sprintf(bp, "{\"model\":\"nomic-embed-text\",\"prompt\":\"");
const char* cp = text;
size_t written = 0;
while (*cp && written < tlen) {
if (*cp == '"') { *bp++ = '\\'; *bp++ = '"'; }
else if (*cp == '\\') { *bp++ = '\\'; *bp++ = '\\'; }
else if (*cp == '\n') { *bp++ = '\\'; *bp++ = 'n'; }
else if (*cp == '\r') { *bp++ = '\\'; *bp++ = 'r'; }
else if (*cp == '\t') { *bp++ = '\\'; *bp++ = 't'; }
else { *bp++ = *cp; }
cp++; written++;
}
sprintf(bp, "\"}");
CURL* curl = curl_easy_init();
if (!curl) { free(body); return NULL; }
char* resp = NULL;
struct curl_slist* hdrs = NULL;
hdrs = curl_slist_append(hdrs, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:11434/api/embeddings");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hdrs);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, engram_embed_write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 5L); /* short timeout — activation must stay fast */
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
CURLcode rc = curl_easy_perform(curl);
curl_slist_free_all(hdrs);
curl_easy_cleanup(curl);
free(body);
if (rc != CURLE_OK || !resp) { free(resp); return NULL; }
const char* ep = strstr(resp, "\"embedding\"");
if (!ep) { free(resp); return NULL; }
ep += strlen("\"embedding\"");
while (*ep && *ep != '[') ep++;
float* vec = NULL;
uint32_t dim = engram_parse_float_array(ep, &vec);
free(resp);
if (dim == 0) return NULL;
*dim_out = dim;
return vec;
}
static void engram_embed_node(EngramNode* n) {
if (!n || !n->content || !*n->content) return;
/* Build JSON body */
@@ -8508,6 +8581,16 @@ el_val_t engram_stats_json(void) {
return el_wrap_str(el_strdup(buf));
}
/* Count nodes currently in working memory (working_memory_weight > 0). */
el_val_t engram_wm_count(void) {
EngramStore* g = engram_get();
int64_t count = 0;
for (int64_t i = 0; i < g->node_count; i++) {
if (g->nodes[i].working_memory_weight > 0.0) count++;
}
return (el_val_t)count;
}
/* engram_list_layers_json — serialized counterpart of engram_list_layers.
* Returns a JSON array, sorted by activation_priority ascending. */
el_val_t engram_list_layers_json(void) {
@@ -618,6 +618,7 @@ el_val_t engram_scan_nodes_by_type_json(el_val_t node_type, el_val_t limit, el_
el_val_t engram_neighbors_json(el_val_t node_id, el_val_t max_depth, el_val_t direction);
el_val_t engram_activate_json(el_val_t query, el_val_t depth);
el_val_t engram_stats_json(void);
el_val_t engram_wm_count(void);
el_val_t engram_apply_decay_json(void);
el_val_t engram_list_layers_json(void);
/* engram_compile_layered_json — produce a prompt-ready text block split