feat(engram): wire cosine similarity into Layer 2 activation scoring

engram_cosine_sim() was defined and embeddings were computed per-node via nomic-embed-text on write, but the function was never called during activation scoring. The goal_bias computation used only lexical substring matching, ignoring all stored embedding vectors. This change adds engram_embed_query() to embed the query string at search time (5s timeout so Ollama latency never blocks activation), then blends cosine similarity into the working-memory bias with α=0.3: bias_final = goal_bias(lexical) * (1 + 0.3 * max(0, cosine_sim)) Nodes with high semantic similarity to the query but low lexical overlap now receive up to 30% bias boost into working memory promotion. Gracefully degrades to pure lexical when Ollama is unavailable or node has no embedding.
2026-05-14 11:05:56 -05:00
parent 0c2ff6957e
commit 1a8a16002e
2 changed files with 87 additions and 6 deletions
@@ -493,8 +493,13 @@ fn route_neuron_config(method: String, path: String, body: String) -> String {
    "{\"key\":\"" + key + "\",\"value\":\"\"}"
 }

-// route_neuron_state_events — log internal state event node
+// route_neuron_state_events — GET lists ISEs, POST logs a new one
 fn route_neuron_state_events(method: String, path: String, body: String) -> String {
+    if str_eq(method, "GET") {
+        let limit_str: String = query_param(path, "limit")
+        let limit: Int = if str_eq(limit_str, "") { 50 } else { str_to_int(limit_str) }
+        return engram_scan_nodes_by_type_json("InternalStateEvent", limit, 0)
+    }
    let content: String = json_get_string(body, "content")
    if str_eq(content, "") { let content = body }
    let id: String = engram_node_full(content, "InternalStateEvent", "state-event", 0.3, 0.3, 1.0, "Working", "internal-state")
@@ -5955,11 +5955,12 @@ static void engram_persist_node(const char* data_dir, EngramNode* n);
 static void engram_persist_edge(const char* data_dir, EngramEdge* e);

 /* Binary persistence + embedding forward declarations. */
-static int   engram_keys_init(void);
-static int   engram_write_binary(const char* path);
-static int   engram_load_binary(const char* path);
-static void  engram_embed_node(EngramNode* n);
-static float engram_cosine_sim(const float* a, const float* b, uint32_t dim);
+static int      engram_keys_init(void);
+static int      engram_write_binary(const char* path);
+static int      engram_load_binary(const char* path);
+static void     engram_embed_node(EngramNode* n);
+static uint32_t engram_embed_query(const char* text, float** vec_out);
+static float    engram_cosine_sim(const float* a, const float* b, uint32_t dim);
 static void  engram_checkpoint(void);
 static void  engram_emit_ise_internal(const char* content, const char* label);

@@ -6870,9 +6871,17 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
        double inh = best_bg[src] * e->weight;
        if (inh > inhibition[tgt]) inhibition[tgt] = inh;
    }
+    /* Embed the query string once for semantic similarity in Layer 2.
+     * Uses a 5s timeout so a slow/absent Ollama never blocks activation.
+     * query_emb is NULL and query_edim is 0 if embedding fails — all
+     * downstream cosine-sim paths guard on this and degrade to bias=1.0. */
+    float*   query_emb  = NULL;
+    uint32_t query_edim = engram_embed_query(q, &query_emb);
+
    /* Step B: compute working_memory_weight per candidate node. */
    double* wm_weights = calloc((size_t)g->node_count, sizeof(double));
    if (!wm_weights) {
+        free(query_emb);
        free(best_bg); free(best_hops); free(reached); free(seeds);
        free(fr); free(inhibition); return out;
    }
@@ -6883,6 +6892,19 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
        double type_threshold = engram_type_threshold(n->node_type, n->tier);
        /* Goal bias weights the node's relevance to current intent. */
        double bias = engram_goal_bias(n, q);
+        /* Cosine similarity boost: if both query and node have embeddings,
+         * blend semantic similarity into the bias with weight α=0.3.
+         * sim ∈ [-1, 1]; clamp to [0, 1] before blending.
+         * bias_final = bias * (1 + 0.3 * max(0, sim))
+         * This boosts semantically close nodes even when lexical overlap is low. */
+        if (query_emb && query_edim > 0 &&
+            n->embedding && n->embedding_dim == query_edim) {
+            float sim = engram_cosine_sim(query_emb, n->embedding, query_edim);
+            if (sim > 0.0f) {
+                bias *= (1.0 + 0.3 * (double)sim);
+                if (bias > 2.0) bias = 2.0;
+            }
+        }
        /* Raw working memory score. */
        double raw_wm = best_bg[i] * bias * n->confidence;
        /* Apply inhibitory suppression. Full inhibition → scale by factor. */
@@ -7032,6 +7054,7 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
    Result* results = malloc((size_t)g->node_count * sizeof(Result));
    int64_t rcount = 0;
    if (!results) {
+        free(query_emb);
        free(best_bg); free(best_hops); free(reached); free(seeds);
        free(fr); free(inhibition); free(wm_weights); return out;
    }
@@ -7076,6 +7099,7 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
                           (el_val_t)(results[i].wm > 0.0 ? 1 : 0));
        out = el_list_append(out, entry);
    }
+    free(query_emb);
    free(best_bg); free(best_hops); free(reached);
    free(seeds); free(fr); free(inhibition); free(wm_weights); free(results);
    return out;
@@ -7321,6 +7345,58 @@ static void engram_embed_node(EngramNode* n) {

 /* ── Engram: cosine similarity ───────────────────────────────────────────── */

+/* Embed an arbitrary text string into a float vector via Ollama.
+ * Returns the dimension (0 on failure). Caller must free *vec_out. */
+static uint32_t engram_embed_query(const char* text, float** vec_out) {
+    *vec_out = NULL;
+    if (!text || !*text) return 0;
+    size_t clen = strlen(text);
+    if (clen > 2048) clen = 2048;
+    char* body = malloc(clen * 6 + 128);
+    if (!body) return 0;
+    char* bp = body;
+    bp += sprintf(bp, "{\"model\":\"nomic-embed-text\",\"prompt\":\"");
+    const char* cp = text;
+    size_t written = 0;
+    while (*cp && written < clen) {
+        if (*cp == '"') { *bp++ = '\\'; *bp++ = '"'; }
+        else if (*cp == '\\') { *bp++ = '\\'; *bp++ = '\\'; }
+        else if (*cp == '\n') { *bp++ = '\\'; *bp++ = 'n'; }
+        else if (*cp == '\r') { *bp++ = '\\'; *bp++ = 'r'; }
+        else if (*cp == '\t') { *bp++ = '\\'; *bp++ = 't'; }
+        else { *bp++ = *cp; }
+        cp++; written++;
+    }
+    sprintf(bp, "\"}");
+    CURL* curl = curl_easy_init();
+    if (!curl) { free(body); return 0; }
+    char* resp = NULL;
+    struct curl_slist* hdrs = NULL;
+    hdrs = curl_slist_append(hdrs, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:11434/api/embeddings");
+    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hdrs);
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, engram_embed_write_cb);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp);
+    curl_easy_setopt(curl, CURLOPT_TIMEOUT, 5L);  /* short timeout — don't block activation */
+    curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
+    CURLcode rc = curl_easy_perform(curl);
+    curl_slist_free_all(hdrs);
+    curl_easy_cleanup(curl);
+    free(body);
+    if (rc != CURLE_OK || !resp) { free(resp); return 0; }
+    const char* ep = strstr(resp, "\"embedding\"");
+    if (!ep) { free(resp); return 0; }
+    ep += strlen("\"embedding\"");
+    while (*ep && *ep != '[') ep++;
+    float* vec = NULL;
+    uint32_t dim = engram_parse_float_array(ep, &vec);
+    free(resp);
+    if (dim == 0) return 0;
+    *vec_out = vec;
+    return dim;
+}
+
 static float engram_cosine_sim(const float* a, const float* b, uint32_t dim) {
    if (!a || !b || dim == 0) return 0.0f;
    double dot = 0.0, na = 0.0, nb = 0.0;