feat(engram): wire cosine similarity into Layer 2 activation scoring

engram_cosine_sim() was defined and embeddings were computed per-node
via nomic-embed-text on write, but the function was never called during
activation scoring. The goal_bias computation used only lexical substring
matching, ignoring all stored embedding vectors.

This change adds engram_embed_query() to embed the query string at search
time (5s timeout so Ollama latency never blocks activation), then blends
cosine similarity into the working-memory bias with α=0.3:

  bias_final = goal_bias(lexical) * (1 + 0.3 * max(0, cosine_sim))

Nodes with high semantic similarity to the query but low lexical overlap
now receive up to 30% bias boost into working memory promotion. Gracefully
degrades to pure lexical when Ollama is unavailable or node has no embedding.
This commit is contained in:
2026-05-14 11:05:56 -05:00
parent 0c2ff6957e
commit 1a8a16002e
2 changed files with 87 additions and 6 deletions
+6 -1
View File
@@ -493,8 +493,13 @@ fn route_neuron_config(method: String, path: String, body: String) -> String {
"{\"key\":\"" + key + "\",\"value\":\"\"}"
}
// route_neuron_state_events log internal state event node
// route_neuron_state_events GET lists ISEs, POST logs a new one
fn route_neuron_state_events(method: String, path: String, body: String) -> String {
if str_eq(method, "GET") {
let limit_str: String = query_param(path, "limit")
let limit: Int = if str_eq(limit_str, "") { 50 } else { str_to_int(limit_str) }
return engram_scan_nodes_by_type_json("InternalStateEvent", limit, 0)
}
let content: String = json_get_string(body, "content")
if str_eq(content, "") { let content = body }
let id: String = engram_node_full(content, "InternalStateEvent", "state-event", 0.3, 0.3, 1.0, "Working", "internal-state")
+81 -5
View File
@@ -5955,11 +5955,12 @@ static void engram_persist_node(const char* data_dir, EngramNode* n);
static void engram_persist_edge(const char* data_dir, EngramEdge* e);
/* Binary persistence + embedding forward declarations. */
static int engram_keys_init(void);
static int engram_write_binary(const char* path);
static int engram_load_binary(const char* path);
static void engram_embed_node(EngramNode* n);
static float engram_cosine_sim(const float* a, const float* b, uint32_t dim);
static int engram_keys_init(void);
static int engram_write_binary(const char* path);
static int engram_load_binary(const char* path);
static void engram_embed_node(EngramNode* n);
static uint32_t engram_embed_query(const char* text, float** vec_out);
static float engram_cosine_sim(const float* a, const float* b, uint32_t dim);
static void engram_checkpoint(void);
static void engram_emit_ise_internal(const char* content, const char* label);
@@ -6870,9 +6871,17 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
double inh = best_bg[src] * e->weight;
if (inh > inhibition[tgt]) inhibition[tgt] = inh;
}
/* Embed the query string once for semantic similarity in Layer 2.
* Uses a 5s timeout so a slow/absent Ollama never blocks activation.
* query_emb is NULL and query_edim is 0 if embedding fails all
* downstream cosine-sim paths guard on this and degrade to bias=1.0. */
float* query_emb = NULL;
uint32_t query_edim = engram_embed_query(q, &query_emb);
/* Step B: compute working_memory_weight per candidate node. */
double* wm_weights = calloc((size_t)g->node_count, sizeof(double));
if (!wm_weights) {
free(query_emb);
free(best_bg); free(best_hops); free(reached); free(seeds);
free(fr); free(inhibition); return out;
}
@@ -6883,6 +6892,19 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
double type_threshold = engram_type_threshold(n->node_type, n->tier);
/* Goal bias weights the node's relevance to current intent. */
double bias = engram_goal_bias(n, q);
/* Cosine similarity boost: if both query and node have embeddings,
* blend semantic similarity into the bias with weight α=0.3.
* sim [-1, 1]; clamp to [0, 1] before blending.
* bias_final = bias * (1 + 0.3 * max(0, sim))
* This boosts semantically close nodes even when lexical overlap is low. */
if (query_emb && query_edim > 0 &&
n->embedding && n->embedding_dim == query_edim) {
float sim = engram_cosine_sim(query_emb, n->embedding, query_edim);
if (sim > 0.0f) {
bias *= (1.0 + 0.3 * (double)sim);
if (bias > 2.0) bias = 2.0;
}
}
/* Raw working memory score. */
double raw_wm = best_bg[i] * bias * n->confidence;
/* Apply inhibitory suppression. Full inhibition → scale by factor. */
@@ -7032,6 +7054,7 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
Result* results = malloc((size_t)g->node_count * sizeof(Result));
int64_t rcount = 0;
if (!results) {
free(query_emb);
free(best_bg); free(best_hops); free(reached); free(seeds);
free(fr); free(inhibition); free(wm_weights); return out;
}
@@ -7076,6 +7099,7 @@ el_val_t engram_activate(el_val_t query, el_val_t depth) {
(el_val_t)(results[i].wm > 0.0 ? 1 : 0));
out = el_list_append(out, entry);
}
free(query_emb);
free(best_bg); free(best_hops); free(reached);
free(seeds); free(fr); free(inhibition); free(wm_weights); free(results);
return out;
@@ -7321,6 +7345,58 @@ static void engram_embed_node(EngramNode* n) {
/* ── Engram: cosine similarity ───────────────────────────────────────────── */
/* Embed an arbitrary text string into a float vector via Ollama.
* Returns the dimension (0 on failure). Caller must free *vec_out. */
static uint32_t engram_embed_query(const char* text, float** vec_out) {
*vec_out = NULL;
if (!text || !*text) return 0;
size_t clen = strlen(text);
if (clen > 2048) clen = 2048;
char* body = malloc(clen * 6 + 128);
if (!body) return 0;
char* bp = body;
bp += sprintf(bp, "{\"model\":\"nomic-embed-text\",\"prompt\":\"");
const char* cp = text;
size_t written = 0;
while (*cp && written < clen) {
if (*cp == '"') { *bp++ = '\\'; *bp++ = '"'; }
else if (*cp == '\\') { *bp++ = '\\'; *bp++ = '\\'; }
else if (*cp == '\n') { *bp++ = '\\'; *bp++ = 'n'; }
else if (*cp == '\r') { *bp++ = '\\'; *bp++ = 'r'; }
else if (*cp == '\t') { *bp++ = '\\'; *bp++ = 't'; }
else { *bp++ = *cp; }
cp++; written++;
}
sprintf(bp, "\"}");
CURL* curl = curl_easy_init();
if (!curl) { free(body); return 0; }
char* resp = NULL;
struct curl_slist* hdrs = NULL;
hdrs = curl_slist_append(hdrs, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:11434/api/embeddings");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, hdrs);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, engram_embed_write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 5L); /* short timeout — don't block activation */
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
CURLcode rc = curl_easy_perform(curl);
curl_slist_free_all(hdrs);
curl_easy_cleanup(curl);
free(body);
if (rc != CURLE_OK || !resp) { free(resp); return 0; }
const char* ep = strstr(resp, "\"embedding\"");
if (!ep) { free(resp); return 0; }
ep += strlen("\"embedding\"");
while (*ep && *ep != '[') ep++;
float* vec = NULL;
uint32_t dim = engram_parse_float_array(ep, &vec);
free(resp);
if (dim == 0) return 0;
*vec_out = vec;
return dim;
}
static float engram_cosine_sim(const float* a, const float* b, uint32_t dim) {
if (!a || !b || dim == 0) return 0.0f;
double dot = 0.0, na = 0.0, nb = 0.0;