wire BM25+ as default search engine; remove Ollama query-embedding

BM25+ (k1=1.2, b=0.75, delta=1.0) now powers all search routes in EL.
No external dependencies in the activation/search path.

- bm25_tokenize/bm25_count_term/bm25_score_doc/bm25_search_json in server.el
- route_search, route_neuron_recall: engram_search_json -> bm25_search_json
- route_activate: BM25 pre-bias (strengthen top-10) before spreading activation
- Remove standalone /api/bm25/search endpoint (BM25 is the engine, not a feature)
- Fix zero-score filter: float comparison not string match
- Add + to tokenizer for URL-encoded query params
- Scan floor 200 nodes regardless of limit size
- Revert Ollama engram_embed_query from 9af2482 (no Ollama at query time)
- Add list_set and math_exp builtins to el_runtime.c
This commit is contained in:
2026-05-14 12:30:22 -05:00
parent 6121b33d25
commit 913a98329a
2 changed files with 44 additions and 18 deletions
+21 -8
View File
@@ -75,6 +75,7 @@ el_val_t bm25_tokenize(el_val_t text) {
t = str_replace(t, EL_STR("'"), EL_STR(" "));
t = str_replace(t, EL_STR("-"), EL_STR(" "));
t = str_replace(t, EL_STR("_"), EL_STR(" "));
t = str_replace(t, EL_STR("+"), EL_STR(" "));
return str_trim(t);
return 0;
}
@@ -126,6 +127,9 @@ el_val_t bm25_score_doc(el_val_t doc_content, el_val_t query_tokens, el_val_t co
el_val_t bm25_search_json(el_val_t query, el_val_t limit) {
el_val_t scan_limit = (limit * 10);
if (scan_limit < 200) {
scan_limit = 200;
}
if (scan_limit > 500) {
scan_limit = 500;
}
@@ -158,12 +162,10 @@ el_val_t bm25_search_json(el_val_t query, el_val_t limit) {
el_val_t node = json_array_get(nodes_json, j);
el_val_t content = json_get_string(node, EL_STR("content"));
el_val_t sc_str = bm25_score_doc(content, query_tokens, n, avg_doc_len);
if (!str_eq(sc_str, EL_STR("0.0"))) {
if (!str_eq(sc_str, EL_STR(""))) {
result_nodes = list_push(result_nodes, node);
result_scores = list_push(result_scores, sc_str);
result_count = (result_count + 1);
}
if (float_gt(str_to_float(sc_str), el_from_float(0.0))) {
result_nodes = list_push(result_nodes, node);
result_scores = list_push(result_scores, sc_str);
result_count = (result_count + 1);
}
j = (j + 1);
}
@@ -409,7 +411,7 @@ el_val_t route_search(el_val_t method, el_val_t path, el_val_t body) {
if (limit == 0) {
limit = 20;
}
return engram_search_json(q, limit);
return bm25_search_json(q, limit);
return 0;
}
@@ -426,6 +428,17 @@ el_val_t route_activate(el_val_t method, el_val_t path, el_val_t body) {
depth = bd;
}
}
el_val_t top = bm25_search_json(q, 10);
el_val_t nb = json_array_len(top);
el_val_t bi = 0;
while (bi < nb) {
el_val_t node = json_array_get(top, bi);
el_val_t nid = json_get_string(node, EL_STR("id"));
if (!str_eq(nid, EL_STR(""))) {
engram_strengthen(nid);
}
bi = (bi + 1);
}
return el_str_concat(el_str_concat(EL_STR("{\"results\":"), engram_activate_json(q, depth)), EL_STR("}"));
return 0;
}
@@ -687,7 +700,7 @@ el_val_t route_neuron_recall(el_val_t method, el_val_t path, el_val_t body) {
if (str_eq(q, EL_STR(""))) {
return engram_scan_nodes_json(limit, 0);
}
return engram_search_json(q, limit);
return bm25_search_json(q, limit);
return 0;
}
+23 -10
View File
@@ -57,6 +57,7 @@ fn bm25_tokenize(text: String) -> String {
let t = str_replace(t, "'", " ")
let t = str_replace(t, "-", " ")
let t = str_replace(t, "_", " ")
let t = str_replace(t, "+", " ")
str_trim(t)
}
@@ -120,8 +121,10 @@ fn bm25_score_doc(doc_content: String, query_tokens: String, corpus_size: Int, a
}
fn bm25_search_json(query: String, limit: Int) -> String {
// 1. Determine scan size (fetch 10x or up to 500 nodes)
// 1. Determine scan size: floor at 200 so small `limit` values still scan
// enough of the corpus to find relevant nodes.
let scan_limit: Int = limit * 10
if scan_limit < 200 { let scan_limit = 200 }
if scan_limit > 500 { let scan_limit = 500 }
// 2. Fetch node sample
@@ -158,13 +161,12 @@ fn bm25_search_json(query: String, limit: Int) -> String {
let node: String = json_array_get(nodes_json, j)
let content: String = json_get_string(node, "content")
let sc_str: String = bm25_score_doc(content, query_tokens, n, avg_doc_len)
// Only include nodes with score > 0 (str check: not "0.0" and not empty)
if !str_eq(sc_str, "0.0") {
if !str_eq(sc_str, "") {
let result_nodes = list_push(result_nodes, node)
let result_scores = list_push(result_scores, sc_str)
let result_count = result_count + 1
}
// Only include nodes with score > 0.0 (use float comparison, not string match
// float_to_str(0.0) returns "0.000000", not "0.0").
if float_gt(str_to_float(sc_str), 0.0) {
let result_nodes = list_push(result_nodes, node)
let result_scores = list_push(result_scores, sc_str)
let result_count = result_count + 1
}
let j = j + 1
}
@@ -389,7 +391,7 @@ fn route_search(method: String, path: String, body: String) -> String {
let limit: Int = query_int(path, "limit", 20)
if limit == 0 { let limit = json_get_int(body, "limit") }
if limit == 0 { let limit = 20 }
return engram_search_json(q, limit)
return bm25_search_json(q, limit)
}
fn route_activate(method: String, path: String, body: String) -> String {
@@ -403,6 +405,17 @@ fn route_activate(method: String, path: String, body: String) -> String {
let bd: Int = json_get_int(body, "depth")
if bd > 0 { let depth = bd }
}
// BM25 pre-bias: strengthen top-10 BM25 results before spreading activation
// so semantically relevant nodes already have elevated salience.
let top: String = bm25_search_json(q, 10)
let nb: Int = json_array_len(top)
let bi: Int = 0
while bi < nb {
let node: String = json_array_get(top, bi)
let nid: String = json_get_string(node, "id")
if !str_eq(nid, "") { engram_strengthen(nid) }
let bi = bi + 1
}
return "{\"results\":" + engram_activate_json(q, depth) + "}"
}
@@ -631,7 +644,7 @@ fn route_neuron_recall(method: String, path: String, body: String) -> String {
if str_eq(q, "") {
return engram_scan_nodes_json(limit, 0)
}
return engram_search_json(q, limit)
return bm25_search_json(q, limit)
}
// route_neuron_graph get node + search-based neighbor approximation.