Add Ollama provider, portable memory, cultivation digest, refugee importer, GLM-OCR spike
- P0: unified soul binary with engram_node_full fix, read-back-verify, search fix - P0: move API keys from plaintext plists to macOS Keychain - P0: fix MCP backend URL (port 8742 → 7770) - P1.6: memory-export/import scripts (AES-256-CBC, versioned .neuronmem format) - P1.7: nightly cultivation digest with sharpness metric (launchd at 23:55) - P2.10: Ollama provider in agentic loop (SOUL_LLM_PROVIDER=ollama) - P3.12: refugee importer for ChatGPT/Screenpipe/generic formats - P3.13: GLM-OCR spike — SHIP IT (mlx-vlm, 1.59GB, photo-to-memory.sh)
This commit is contained in:
+1
-2
@@ -679,8 +679,7 @@ el_val_t handle_chat(el_val_t body) {
|
||||
el_val_t thread_snip = ({ el_val_t _if_result_112 = 0; if ((str_len(last_content) > 250)) { _if_result_112 = (str_slice(last_content, 0, 250)); } else { _if_result_112 = (last_content); } _if_result_112; });
|
||||
el_val_t activation_seed = ({ el_val_t _if_result_113 = 0; if (!str_eq(thread_snip, EL_STR(""))) { _if_result_113 = (el_str_concat(el_str_concat(thread_snip, EL_STR(" ")), message)); } else { _if_result_113 = (message); } _if_result_113; });
|
||||
el_val_t affective_prefix = EL_NULL;
|
||||
EL_NULL;
|
||||
Int = time_now();
|
||||
el_val_t aff_now_ts = time_now();
|
||||
el_val_t aff_cutoff = (aff_now_ts - 259200);
|
||||
el_val_t boot_aff = state_get(EL_STR("soul_affective_context"));
|
||||
el_val_t has_boot_aff = !str_eq(boot_aff, EL_STR(""));
|
||||
|
||||
+34
-24028
File diff suppressed because it is too large
Load Diff
+3
-3
@@ -1,7 +1,7 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn sem_get(json: String, key: String) -> String
|
||||
extern fn generate_frame(frame: Any) -> String
|
||||
extern fn generate_frame_lang(frame: Any, lang_code: String) -> String
|
||||
extern fn build_form_from_json(semantic_form_json: String, lang_code: String) -> Any
|
||||
extern fn generate_frame(frame: [String]) -> String
|
||||
extern fn generate_frame_lang(frame: [String], lang_code: String) -> String
|
||||
extern fn build_form_from_json(semantic_form_json: String, lang_code: String) -> [String]
|
||||
extern fn generate(semantic_form_json: String) -> String
|
||||
extern fn generate_lang(semantic_form_json: String, lang_code: String) -> String
|
||||
|
||||
-5
@@ -656,8 +656,3 @@ el_val_t generate_tree(el_val_t rule_id_str, el_val_t slots) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int _argc, char** _argv) {
|
||||
el_runtime_init_args(_argc, _argv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
+28
-28
@@ -1,22 +1,22 @@
|
||||
// auto-generated by elc --emit-header - do not edit
|
||||
extern fn slots_get(slots: Any, key: String) -> String
|
||||
extern fn slots_set(slots: Any, key: String, val: String) -> Any
|
||||
extern fn make_slots(k0: String, v0: String) -> Any
|
||||
extern fn make_slots2(k0: String, v0: String, k1: String, v1: String) -> Any
|
||||
extern fn make_slots3(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String) -> Any
|
||||
extern fn make_slots4(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String) -> Any
|
||||
extern fn make_slots5(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String, k4: String, v4: String) -> Any
|
||||
extern fn rule_id(rule: Any) -> String
|
||||
extern fn rule_lhs(rule: Any) -> String
|
||||
extern fn rule_rhs_len(rule: Any) -> Int
|
||||
extern fn rule_rhs(rule: Any, idx: Int) -> String
|
||||
extern fn make_rule(id: String, lhs: String, r0: String) -> Any
|
||||
extern fn make_rule2(id: String, lhs: String, r0: String, r1: String) -> Any
|
||||
extern fn make_rule3(id: String, lhs: String, r0: String, r1: String, r2: String) -> Any
|
||||
extern fn make_rule4(id: String, lhs: String, r0: String, r1: String, r2: String, r3: String) -> Any
|
||||
extern fn build_rules() -> Any
|
||||
extern fn get_rules() -> Any
|
||||
extern fn find_rule(rule_id_str: String) -> Any
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn slots_get(slots: [String], key: String) -> String
|
||||
extern fn slots_set(slots: [String], key: String, val: String) -> [String]
|
||||
extern fn make_slots(k0: String, v0: String) -> [String]
|
||||
extern fn make_slots2(k0: String, v0: String, k1: String, v1: String) -> [String]
|
||||
extern fn make_slots3(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String) -> [String]
|
||||
extern fn make_slots4(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String) -> [String]
|
||||
extern fn make_slots5(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String, k4: String, v4: String) -> [String]
|
||||
extern fn rule_id(rule: [String]) -> String
|
||||
extern fn rule_lhs(rule: [String]) -> String
|
||||
extern fn rule_rhs_len(rule: [String]) -> Int
|
||||
extern fn rule_rhs(rule: [String], idx: Int) -> String
|
||||
extern fn make_rule(id: String, lhs: String, r0: String) -> [String]
|
||||
extern fn make_rule2(id: String, lhs: String, r0: String, r1: String) -> [String]
|
||||
extern fn make_rule3(id: String, lhs: String, r0: String, r1: String, r2: String) -> [String]
|
||||
extern fn make_rule4(id: String, lhs: String, r0: String, r1: String, r2: String, r3: String) -> [String]
|
||||
extern fn build_rules() -> [[String]]
|
||||
extern fn get_rules() -> [[String]]
|
||||
extern fn find_rule(rule_id_str: String) -> [String]
|
||||
extern fn make_leaf(label: String, word: String) -> String
|
||||
extern fn make_node1(label: String, child0: String) -> String
|
||||
extern fn make_node2(label: String, child0: String, child1: String) -> String
|
||||
@@ -24,15 +24,15 @@ extern fn make_node3(label: String, child0: String, child1: String, child2: Stri
|
||||
extern fn make_node4(label: String, child0: String, child1: String, child2: String, child3: String) -> String
|
||||
extern fn nlg_is_ws(c: String) -> Bool
|
||||
extern fn skip_ws(s: String, pos: Int) -> Int
|
||||
extern fn scan_token(s: String, start: Int) -> Any
|
||||
extern fn scan_token(s: String, start: Int) -> [String]
|
||||
extern fn render_tree(tree: String) -> String
|
||||
extern fn gram_word_order(profile: Any) -> String
|
||||
extern fn gram_order_constituents(subj: String, verb: String, obj: String, profile: Any) -> String
|
||||
extern fn gram_build_vp(verb: String, aux: String, profile: Any) -> String
|
||||
extern fn gram_question_strategy(profile: Any) -> String
|
||||
extern fn gram_word_order(profile: [String]) -> String
|
||||
extern fn gram_order_constituents(subj: String, verb: String, obj: String, profile: [String]) -> String
|
||||
extern fn gram_build_vp(verb: String, aux: String, profile: [String]) -> String
|
||||
extern fn gram_question_strategy(profile: [String]) -> String
|
||||
extern fn is_pronoun(word: String) -> Bool
|
||||
extern fn build_np(referent: String, slots: Any) -> String
|
||||
extern fn build_np(referent: String, slots: [String]) -> String
|
||||
extern fn build_pp(loc: String) -> String
|
||||
extern fn build_vp_body(slots: Any) -> String
|
||||
extern fn build_vp_from_slots(slots: Any) -> String
|
||||
extern fn generate_tree(rule_id_str: String, slots: Any) -> String
|
||||
extern fn build_vp_body(slots: [String]) -> String
|
||||
extern fn build_vp_from_slots(slots: [String]) -> String
|
||||
extern fn generate_tree(rule_id_str: String, slots: [String]) -> String
|
||||
|
||||
-5
@@ -392,8 +392,3 @@ el_val_t lang_code(el_val_t profile) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int _argc, char** _argv) {
|
||||
el_runtime_init_args(_argc, _argv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
+23
-3
@@ -34,7 +34,18 @@ el_val_t tier_canonical(void) {
|
||||
}
|
||||
|
||||
el_val_t mem_store(el_val_t content, el_val_t label, el_val_t tags) {
|
||||
return engram_node_full(content, EL_STR("Memory"), label, el_from_float(0.5), el_from_float(0.5), el_from_float(0.8), EL_STR("Working"), tags);
|
||||
el_val_t id = engram_node_full(content, EL_STR("Memory"), label, el_from_float(0.5), el_from_float(0.5), el_from_float(0.8), EL_STR("Working"), tags);
|
||||
if (str_eq(id, EL_STR(""))) {
|
||||
println(el_str_concat(EL_STR("[memory] write rejected by engram (empty id): label="), label));
|
||||
return EL_STR("");
|
||||
}
|
||||
el_val_t readback = engram_get_node_json(id);
|
||||
if (str_eq(readback, EL_STR("")) || str_eq(readback, EL_STR("{}"))) {
|
||||
println(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("[memory] WRITE VERIFY FAILED: label="), label), EL_STR(" id=")), id), EL_STR(" \xe2\x80\x94 node absent after write")));
|
||||
return EL_STR("");
|
||||
}
|
||||
println(el_str_concat(el_str_concat(EL_STR("[memory] write verified: "), id), EL_STR(" ok")));
|
||||
return id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -136,7 +147,12 @@ el_val_t mem_boot_count_inc(void) {
|
||||
el_val_t tags = EL_STR("[\"soul-meta\",\"boot-counter\"]");
|
||||
el_val_t boot_node_id = engram_node_full(content, EL_STR("Memory"), EL_STR("soul:boot_count"), el_from_float(0.9), el_from_float(0.9), el_from_float(1.0), EL_STR("Canonical"), tags);
|
||||
if (str_eq(boot_node_id, EL_STR(""))) {
|
||||
println(el_str_concat(el_str_concat(EL_STR("[memory] mem_boot_count_inc: engram write failed \xe2\x80\x94 boot counter node lost (count="), int_to_str(next)), EL_STR(")")));
|
||||
println(el_str_concat(el_str_concat(EL_STR("[memory] mem_boot_count_inc: write rejected (empty id) \xe2\x80\x94 boot counter node lost (count="), int_to_str(next)), EL_STR(")")));
|
||||
return next;
|
||||
}
|
||||
el_val_t boot_readback = engram_get_node_json(boot_node_id);
|
||||
if (str_eq(boot_readback, EL_STR("")) || str_eq(boot_readback, EL_STR("{}"))) {
|
||||
println(el_str_concat(el_str_concat(el_str_concat(EL_STR("[memory] mem_boot_count_inc: WRITE VERIFY FAILED id="), boot_node_id), EL_STR(" count=")), int_to_str(next)));
|
||||
}
|
||||
return next;
|
||||
return 0;
|
||||
@@ -149,7 +165,11 @@ el_val_t mem_emit_state_event(el_val_t trigger, el_val_t kind, el_val_t content)
|
||||
el_val_t safe_content = str_replace(content, EL_STR("\""), EL_STR("'"));
|
||||
el_val_t payload = el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("{\"trigger\":\""), safe_trigger), EL_STR("\"")), EL_STR(",\"kind\":\"")), kind), EL_STR("\"")), EL_STR(",\"content\":\"")), safe_content), EL_STR("\"")), EL_STR(",\"boot\":")), int_to_str(boot)), EL_STR(",\"ts\":")), int_to_str(ts)), EL_STR("}"));
|
||||
el_val_t tags = EL_STR("[\"internal-state\",\"pre-reasoning\",\"InternalStateEvent\"]");
|
||||
return engram_node_full(payload, EL_STR("InternalStateEvent"), el_str_concat(EL_STR("state-event:"), kind), el_from_float(0.85), el_from_float(0.8), el_from_float(0.9), EL_STR("Episodic"), tags);
|
||||
el_val_t event_id = engram_node_full(payload, EL_STR("InternalStateEvent"), el_str_concat(EL_STR("state-event:"), kind), el_from_float(0.85), el_from_float(0.8), el_from_float(0.9), EL_STR("Episodic"), tags);
|
||||
if (str_eq(event_id, EL_STR(""))) {
|
||||
println(el_str_concat(EL_STR("[memory] mem_emit_state_event: write rejected (empty id): kind="), kind));
|
||||
}
|
||||
return event_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -180,7 +180,7 @@ el_val_t api_persisted(el_val_t id) {
|
||||
return 0;
|
||||
}
|
||||
el_val_t node = engram_get_node_json(id);
|
||||
return (!str_eq(node, EL_STR("")) && !str_eq(node, EL_STR("null")));
|
||||
return ((!str_eq(node, EL_STR("")) && !str_eq(node, EL_STR("null"))) && !str_eq(node, EL_STR("{}")));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
+2
-7
@@ -193,10 +193,10 @@ el_val_t realize_question_lang(el_val_t predicate, el_val_t tense, el_val_t aspe
|
||||
loc_part = core;
|
||||
}
|
||||
if (str_eq(code, EL_STR("ja"))) {
|
||||
return el_str_concat(loc_part, EL_STR(" か"));
|
||||
return el_str_concat(loc_part, EL_STR(" \xe3\x81\x8b"));
|
||||
}
|
||||
if (str_eq(code, EL_STR("hi"))) {
|
||||
return el_str_concat(loc_part, EL_STR(" क्या"));
|
||||
return el_str_concat(loc_part, EL_STR(" \xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xaf\xe0\xa4\xbe"));
|
||||
}
|
||||
if (str_eq(code, EL_STR("fi"))) {
|
||||
return el_str_concat(loc_part, EL_STR("-ko"));
|
||||
@@ -314,8 +314,3 @@ el_val_t realize(el_val_t form) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int _argc, char** _argv) {
|
||||
el_runtime_init_args(_argc, _argv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
+5
-5
@@ -1,10 +1,10 @@
|
||||
// auto-generated by elc --emit-header - do not edit
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn agent_person(agent: String) -> String
|
||||
extern fn agent_number(agent: String) -> String
|
||||
extern fn realize_np(referent: String, number: String) -> String
|
||||
extern fn realize_vp_lang(base_verb: String, tense: String, aspect: String, person: String, number: String, profile: Any) -> Any
|
||||
extern fn realize_question_lang(predicate: String, tense: String, aspect: String, person: String, number: String, agent: String, patient: String, location: String, profile: Any) -> String
|
||||
extern fn realize_vp_lang(base_verb: String, tense: String, aspect: String, person: String, number: String, profile: [String]) -> [String]
|
||||
extern fn realize_question_lang(predicate: String, tense: String, aspect: String, person: String, number: String, agent: String, patient: String, location: String, profile: [String]) -> String
|
||||
extern fn capitalize_first(s: String) -> String
|
||||
extern fn add_punct(s: String, intent: String) -> String
|
||||
extern fn realize_lang(form: Any, profile: Any) -> String
|
||||
extern fn realize(form: Any) -> String
|
||||
extern fn realize_lang(form: [String], profile: [String]) -> String
|
||||
extern fn realize(form: [String]) -> String
|
||||
|
||||
-5
@@ -291,8 +291,3 @@ el_val_t sem_realize_lang(el_val_t frame, el_val_t lang_code) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int _argc, char** _argv) {
|
||||
el_runtime_init_args(_argc, _argv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
+15
-15
@@ -1,18 +1,18 @@
|
||||
// auto-generated by elc --emit-header - do not edit
|
||||
extern fn sem_frame(intent: String, subject: String, obj: String, modifiers: String) -> Any
|
||||
extern fn sem_frame_lang(intent: String, subject: String, obj: String, modifiers: String, lang_code: String) -> Any
|
||||
extern fn sem_frame_simple(intent: String, subject: String) -> Any
|
||||
extern fn sem_frame_obj(intent: String, subject: String, obj: String) -> Any
|
||||
extern fn sem_intent(frame: Any) -> String
|
||||
extern fn sem_subject(frame: Any) -> String
|
||||
extern fn sem_object(frame: Any) -> String
|
||||
extern fn sem_modifiers(frame: Any) -> String
|
||||
extern fn sem_lang(frame: Any) -> String
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn sem_frame(intent: String, subject: String, obj: String, modifiers: String) -> [String]
|
||||
extern fn sem_frame_lang(intent: String, subject: String, obj: String, modifiers: String, lang_code: String) -> [String]
|
||||
extern fn sem_frame_simple(intent: String, subject: String) -> [String]
|
||||
extern fn sem_frame_obj(intent: String, subject: String, obj: String) -> [String]
|
||||
extern fn sem_intent(frame: [String]) -> String
|
||||
extern fn sem_subject(frame: [String]) -> String
|
||||
extern fn sem_object(frame: [String]) -> String
|
||||
extern fn sem_modifiers(frame: [String]) -> String
|
||||
extern fn sem_lang(frame: [String]) -> String
|
||||
extern fn sem_first_modifier(mods: String) -> String
|
||||
extern fn sem_intent_to_realize(intent: String) -> String
|
||||
extern fn sem_to_spec(frame: Any) -> Any
|
||||
extern fn sem_to_spec_full(frame: Any, verb: String, tense: String, aspect: String) -> Any
|
||||
extern fn sem_to_spec(frame: [String]) -> [String]
|
||||
extern fn sem_to_spec_full(frame: [String], verb: String, tense: String, aspect: String) -> [String]
|
||||
extern fn sem_realize_greet(subject: String) -> String
|
||||
extern fn sem_realize(frame: Any) -> String
|
||||
extern fn sem_realize_full(frame: Any, verb: String, tense: String, aspect: String) -> String
|
||||
extern fn sem_realize_lang(frame: Any, lang_code: String) -> String
|
||||
extern fn sem_realize(frame: [String]) -> String
|
||||
extern fn sem_realize_full(frame: [String], verb: String, tense: String, aspect: String) -> String
|
||||
extern fn sem_realize_lang(frame: [String], lang_code: String) -> String
|
||||
|
||||
+140
-2
@@ -25258,7 +25258,18 @@ el_val_t tier_canonical(void) {
|
||||
}
|
||||
|
||||
el_val_t mem_store(el_val_t content, el_val_t label, el_val_t tags) {
|
||||
return engram_node_full(content, EL_STR("Memory"), label, el_from_float(el_from_float(0.5)), el_from_float(el_from_float(0.5)), el_from_float(el_from_float(0.8)), EL_STR("Working"), tags);
|
||||
el_val_t id = engram_node_full(content, EL_STR("Memory"), label, el_from_float(el_from_float(0.5)), el_from_float(el_from_float(0.5)), el_from_float(el_from_float(0.8)), EL_STR("Working"), tags);
|
||||
if (str_eq(id, EL_STR(""))) {
|
||||
println(el_str_concat(EL_STR("[memory] write rejected by engram (empty id): label="), label));
|
||||
return EL_STR("");
|
||||
}
|
||||
el_val_t readback = engram_get_node_json(id);
|
||||
if (str_eq(readback, EL_STR("")) || str_eq(readback, EL_STR("{}"))) {
|
||||
println(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("[memory] WRITE VERIFY FAILED: label="), label), EL_STR(" id=")), id), EL_STR(" \xe2\x80\x94 node absent after write")));
|
||||
return EL_STR("");
|
||||
}
|
||||
println(el_str_concat(el_str_concat(EL_STR("[memory] write verified: "), id), EL_STR(" ok")));
|
||||
return id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -27028,6 +27039,27 @@ el_val_t next_bridge_id(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* === P2.10: Convert Anthropic tools format to OpenAI function-calling format === */
|
||||
el_val_t anthropic_tools_to_openai(el_val_t tools_json) {
|
||||
el_val_t len = json_array_len(tools_json);
|
||||
if (len <= 0) { return EL_STR("[]"); }
|
||||
el_val_t result = EL_STR("[");
|
||||
el_val_t i = 0;
|
||||
while (i < len) {
|
||||
el_val_t tool = json_array_get(tools_json, i);
|
||||
el_val_t tname = json_get(tool, EL_STR("name"));
|
||||
el_val_t tdesc = json_safe(json_get(tool, EL_STR("description")));
|
||||
el_val_t tschema = json_get_raw(tool, EL_STR("input_schema"));
|
||||
if (str_eq(tschema, EL_STR(""))) { tschema = EL_STR("{\"type\":\"object\",\"properties\":{}}"); }
|
||||
el_val_t oai_tool = el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("{\"type\":\"function\",\"function\":{\"name\":\""), tname), EL_STR("\",\"description\":\"")), tdesc), EL_STR("\",\"parameters\":")), tschema), EL_STR("}}"));
|
||||
if (i > 0) { result = el_str_concat(result, EL_STR(",")); }
|
||||
result = el_str_concat(result, oai_tool);
|
||||
i = (i + 1);
|
||||
}
|
||||
return el_str_concat(result, EL_STR("]"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
el_val_t agentic_loop(el_val_t session_id, el_val_t model, el_val_t safe_sys, el_val_t tools_json, el_val_t messages_in, el_val_t h, el_val_t tools_log_in) {
|
||||
el_val_t api_url = EL_STR("https://api.anthropic.com/v1/messages");
|
||||
el_val_t messages = messages_in;
|
||||
@@ -27039,6 +27071,87 @@ el_val_t agentic_loop(el_val_t session_id, el_val_t model, el_val_t safe_sys, el
|
||||
el_val_t pend_tool_id = EL_STR("");
|
||||
el_val_t pend_tool_name = EL_STR("");
|
||||
el_val_t pend_tool_input = EL_STR("");
|
||||
/* === P2.10: OLLAMA/OPENAI-COMPAT PROVIDER BRANCH === */
|
||||
{
|
||||
el_val_t _ol_prov = env(EL_STR("SOUL_LLM_PROVIDER"));
|
||||
if (str_eq(_ol_prov, EL_STR("ollama"))) {
|
||||
el_val_t _ol_model = env(EL_STR("SOUL_LLM_MODEL"));
|
||||
if (str_eq(_ol_model, EL_STR(""))) { _ol_model = env(EL_STR("OLLAMA_MODEL")); }
|
||||
if (str_eq(_ol_model, EL_STR(""))) { _ol_model = EL_STR("llama3.1"); }
|
||||
el_val_t _ol_base = env(EL_STR("OLLAMA_API_BASE"));
|
||||
if (str_eq(_ol_base, EL_STR(""))) { _ol_base = EL_STR("http://localhost:11434"); }
|
||||
el_val_t _ol_url = el_str_concat(_ol_base, EL_STR("/v1/chat/completions"));
|
||||
println(el_str_concat(el_str_concat(el_str_concat(EL_STR("[soul] provider: ollama @ "), _ol_base), EL_STR(" (model: ")), el_str_concat(_ol_model, EL_STR(")"))));
|
||||
el_val_t _ol_oai_tools = anthropic_tools_to_openai(tools_json);
|
||||
/* Build initial OpenAI-format messages: prepend system message to existing turns */
|
||||
el_val_t _ol_sys_msg = el_str_concat(el_str_concat(EL_STR("{\"role\":\"system\",\"content\":\""), safe_sys), EL_STR("\"}"));
|
||||
el_val_t _ol_msgs_inner = str_slice(messages_in, 1, (str_len(messages_in) - 1));
|
||||
el_val_t _ol_msgs = el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("["), _ol_sys_msg), EL_STR(",")), _ol_msgs_inner), EL_STR("]"));
|
||||
el_val_t _ol_h = el_map_new(0);
|
||||
map_set(_ol_h, EL_STR("content-type"), EL_STR("application/json"));
|
||||
el_val_t _ol_keep = 1;
|
||||
el_val_t _ol_iter = 0;
|
||||
el_val_t _ol_final = EL_STR("");
|
||||
while (_ol_keep && (_ol_iter < 8)) {
|
||||
el_val_t _ol_req = el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("{\"model\":\""), _ol_model), EL_STR("\",\"messages\":")), _ol_msgs), EL_STR(",\"stream\":false,\"tools\":")), _ol_oai_tools), EL_STR("}"));
|
||||
el_val_t _ol_resp = http_post_with_headers(_ol_url, _ol_req, _ol_h);
|
||||
if (str_eq(_ol_resp, EL_STR("")) || str_starts_with(_ol_resp, EL_STR("{\"error\""))) {
|
||||
return EL_STR("{\"error\":\"llm unavailable\",\"reply\":\"\"}");
|
||||
}
|
||||
el_val_t _ol_choices = json_get_raw(_ol_resp, EL_STR("choices"));
|
||||
if (str_eq(_ol_choices, EL_STR("")) || str_eq(_ol_choices, EL_STR("null"))) {
|
||||
return EL_STR("{\"error\":\"no choices in response\",\"reply\":\"\"}");
|
||||
}
|
||||
el_val_t _ol_c0 = json_array_get(_ol_choices, 0);
|
||||
el_val_t _ol_c0_msg = json_get_raw(_ol_c0, EL_STR("message"));
|
||||
el_val_t _ol_content = json_get(_ol_c0_msg, EL_STR("content"));
|
||||
el_val_t _ol_tcs = json_get_raw(_ol_c0_msg, EL_STR("tool_calls"));
|
||||
el_val_t _ol_has_tc = (!str_eq(_ol_tcs, EL_STR("")) && !str_eq(_ol_tcs, EL_STR("null")));
|
||||
el_val_t _ol_text = EL_STR("");
|
||||
if (!str_eq(_ol_content, EL_STR("")) && !str_eq(_ol_content, EL_STR("null"))) { _ol_text = _ol_content; }
|
||||
el_val_t _ol_tname = EL_STR("");
|
||||
el_val_t _ol_tid = EL_STR("");
|
||||
el_val_t _ol_tinput = EL_STR("");
|
||||
if (_ol_has_tc) {
|
||||
el_val_t _ol_tc0 = json_array_get(_ol_tcs, 0);
|
||||
_ol_tid = json_get(_ol_tc0, EL_STR("id"));
|
||||
el_val_t _ol_fn = json_get_raw(_ol_tc0, EL_STR("function"));
|
||||
_ol_tname = json_get(_ol_fn, EL_STR("name"));
|
||||
_ol_tinput = json_get(_ol_fn, EL_STR("arguments"));
|
||||
}
|
||||
el_val_t _ol_is_tool = (_ol_has_tc && !str_eq(_ol_tname, EL_STR("")));
|
||||
el_val_t _ol_result_raw = EL_STR("");
|
||||
if (_ol_is_tool) { _ol_result_raw = dispatch_tool(_ol_tname, _ol_tinput); }
|
||||
el_val_t _ol_result = _ol_result_raw;
|
||||
if (str_len(_ol_result_raw) > 6000) { _ol_result = el_str_concat(str_slice(_ol_result_raw, 0, 6000), EL_STR("...[truncated]")); }
|
||||
if (_ol_has_tc) {
|
||||
el_val_t _ol_tq = el_str_concat(el_str_concat(EL_STR("\""), _ol_tname), EL_STR("\""));
|
||||
if (str_eq(tools_log, EL_STR(""))) { tools_log = _ol_tq; } else { tools_log = el_str_concat(el_str_concat(tools_log, EL_STR(",")), _ol_tq); }
|
||||
}
|
||||
/* arguments must be re-serialized as JSON string for OpenAI assistant message */
|
||||
el_val_t _ol_tinput_escaped = el_str_concat(el_str_concat(EL_STR("\""), json_safe(_ol_tinput)), EL_STR("\""));
|
||||
if (_ol_is_tool) {
|
||||
/* Append assistant tool_call message and tool result to messages */
|
||||
el_val_t _ol_asst_tc = el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"id\":\""), _ol_tid), EL_STR("\",\"type\":\"function\",\"function\":{\"name\":\"")), _ol_tname), EL_STR("\",\"arguments\":")), _ol_tinput_escaped), EL_STR("}}]}"));
|
||||
el_val_t _ol_tool_msg = el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("{\"role\":\"tool\",\"tool_call_id\":\""), _ol_tid), EL_STR("\",\"content\":\"")), json_safe(_ol_result)), EL_STR("\"}"));
|
||||
el_val_t _ol_cur_inner = str_slice(_ol_msgs, 1, (str_len(_ol_msgs) - 1));
|
||||
_ol_msgs = el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("["), _ol_cur_inner), EL_STR(",")), _ol_asst_tc), EL_STR(",")), _ol_tool_msg), EL_STR("]"));
|
||||
} else {
|
||||
_ol_final = _ol_text;
|
||||
_ol_keep = 0;
|
||||
}
|
||||
_ol_iter = (_ol_iter + 1);
|
||||
}
|
||||
if (str_eq(_ol_final, EL_STR(""))) {
|
||||
return EL_STR("{\"error\":\"no response\",\"reply\":\"\"}");
|
||||
}
|
||||
el_val_t _ol_safe_final = json_safe(_ol_final);
|
||||
el_val_t _ol_tools_arr = EL_STR("[]");
|
||||
if (!str_eq(tools_log, EL_STR(""))) { _ol_tools_arr = el_str_concat(el_str_concat(EL_STR("["), tools_log), EL_STR("]")); }
|
||||
return el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("{\"reply\":\""), _ol_safe_final), EL_STR("\",\"model\":\"")), _ol_model), EL_STR("\",\"agentic\":true,\"tools_used\":")), _ol_tools_arr), EL_STR("}"));
|
||||
}
|
||||
}
|
||||
/* === END OLLAMA BRANCH === */
|
||||
while (keep_going && (iteration < 8)) {
|
||||
el_val_t req_body = el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("{\"model\":\""), model), EL_STR("\"")), EL_STR(",\"max_tokens\":4096")), EL_STR(",\"system\":\"")), safe_sys), EL_STR("\"")), EL_STR(",\"tools\":")), tools_json), EL_STR(",\"messages\":")), messages), EL_STR("}"));
|
||||
el_val_t raw_resp = http_post_with_headers(api_url, req_body, h);
|
||||
@@ -27236,7 +27349,16 @@ el_val_t handle_dharma_room_turn(el_val_t body) {
|
||||
}
|
||||
el_val_t clean_response = clean_llm_response(raw_response);
|
||||
el_val_t snap_path = state_get(EL_STR("soul_snapshot_path"));
|
||||
el_val_t discard_id = engram_node(clean_response, EL_STR("episodic"), el_from_float(el_from_float(0.6)));
|
||||
el_val_t utterance_tags = EL_STR("[\"soul-utterance\",\"episodic\"]");
|
||||
el_val_t discard_id = engram_node_full(clean_response, EL_STR("Conversation"), EL_STR("soul:utterance"), el_from_float(el_from_float(0.6)), el_from_float(el_from_float(0.6)), el_from_float(el_from_float(0.8)), EL_STR("Episodic"), utterance_tags);
|
||||
if (!str_eq(discard_id, EL_STR(""))) {
|
||||
el_val_t utterance_verify = engram_get_node_json(discard_id);
|
||||
if (str_eq(utterance_verify, EL_STR("")) || str_eq(utterance_verify, EL_STR("{}"))) {
|
||||
println(el_str_concat(el_str_concat(EL_STR("[memory] WRITE VERIFY FAILED: soul:utterance id="), discard_id), EL_STR(" \xe2\x80\x94 node absent after write")));
|
||||
} else {
|
||||
println(el_str_concat(el_str_concat(EL_STR("[memory] write verified: "), discard_id), EL_STR(" ok")));
|
||||
}
|
||||
}
|
||||
if (!str_eq(snap_path, EL_STR(""))) {
|
||||
el_val_t discard_save = engram_save(snap_path);
|
||||
}
|
||||
@@ -27750,6 +27872,14 @@ el_val_t handle_api_remember(el_val_t body) {
|
||||
el_val_t base_tags = ({ el_val_t _if_result_308 = 0; if (str_eq(tags_raw, EL_STR(""))) { _if_result_308 = (EL_STR("[\"Memory\"]")); } else { _if_result_308 = (tags_raw); } _if_result_308; });
|
||||
el_val_t final_tags = ({ el_val_t _if_result_309 = 0; if (str_eq(project, EL_STR(""))) { _if_result_309 = (base_tags); } else { el_val_t inner = str_slice(base_tags, 1, (str_len(base_tags) - 1)); _if_result_309 = (el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("["), inner), EL_STR(",\"project:")), project), EL_STR("\"]"))); } _if_result_309; });
|
||||
el_val_t id = engram_node_full(content, EL_STR("Memory"), EL_STR("memory:remembered"), el_from_float(sal), el_from_float(sal), el_from_float(el_from_float(0.9)), EL_STR("Episodic"), final_tags);
|
||||
if (str_eq(id, EL_STR(""))) {
|
||||
return EL_STR("{\"ok\":false,\"error\":\"write_not_persisted\",\"id\":\"\"}");
|
||||
}
|
||||
el_val_t remember_readback = engram_get_node_json(id);
|
||||
if (str_eq(remember_readback, EL_STR("")) || str_eq(remember_readback, EL_STR("{}"))) {
|
||||
println(el_str_concat(el_str_concat(EL_STR("[neuron-api] WRITE VERIFY FAILED remember id="), id), EL_STR(" \xe2\x80\x94 node absent after write")));
|
||||
return el_str_concat(el_str_concat(EL_STR("{\"ok\":false,\"error\":\"write_not_persisted\",\"id\":\""), id), EL_STR("\"}"));
|
||||
}
|
||||
return el_str_concat(el_str_concat(EL_STR("{\"id\":\""), id), EL_STR("\",\"ok\":true}"));
|
||||
return 0;
|
||||
}
|
||||
@@ -27804,6 +27934,14 @@ el_val_t handle_api_capture_knowledge(el_val_t body) {
|
||||
el_val_t full = ({ el_val_t _if_result_317 = 0; if (str_eq(title, EL_STR(""))) { _if_result_317 = (content); } else { _if_result_317 = (el_str_concat(el_str_concat(title, EL_STR(": ")), content)); } _if_result_317; });
|
||||
el_val_t tags = EL_STR("[\"Knowledge\",\"captured\"]");
|
||||
el_val_t id = engram_node_full(full, EL_STR("Knowledge"), EL_STR("knowledge:captured"), el_from_float(el_from_float(0.85)), el_from_float(el_from_float(0.8)), el_from_float(el_from_float(0.9)), EL_STR("Episodic"), tags);
|
||||
if (str_eq(id, EL_STR(""))) {
|
||||
return EL_STR("{\"ok\":false,\"error\":\"write_not_persisted\",\"id\":\"\"}");
|
||||
}
|
||||
el_val_t captured_readback = engram_get_node_json(id);
|
||||
if (str_eq(captured_readback, EL_STR("")) || str_eq(captured_readback, EL_STR("{}"))) {
|
||||
println(el_str_concat(el_str_concat(EL_STR("[neuron-api] WRITE VERIFY FAILED capture id="), id), EL_STR(" \xe2\x80\x94 node absent after write")));
|
||||
return el_str_concat(el_str_concat(EL_STR("{\"ok\":false,\"error\":\"write_not_persisted\",\"id\":\""), id), EL_STR("\"}"));
|
||||
}
|
||||
return el_str_concat(el_str_concat(EL_STR("{\"id\":\""), id), EL_STR("\",\"ok\":true}"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
-5
@@ -334,8 +334,3 @@ el_val_t entry_form(el_val_t entry, el_val_t n) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int _argc, char** _argv) {
|
||||
el_runtime_init_args(_argc, _argv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
# GLM-OCR Spike — 2026-06-27
|
||||
|
||||
## Verdict: SHIP IT
|
||||
|
||||
MLX-native path confirmed. Sub-2 GB model, dedicated `mlx-vlm` support for GLM-OCR, MLX already
|
||||
installed on the dev machine. No blockers.
|
||||
|
||||
---
|
||||
|
||||
## Model
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Name** | GLM-OCR |
|
||||
| **HuggingFace path** | `zai-org/GLM-OCR` (base BF16) |
|
||||
| **MLX path** | `mlx-community/GLM-OCR-8bit` |
|
||||
| **Parameters** | 0.9B |
|
||||
| **Disk (MLX 8-bit)** | 1.59 GB (`model.safetensors` 1.58 GB + configs) |
|
||||
| **Architecture** | CogViT visual encoder + cross-modal connector + GLM-0.5B decoder |
|
||||
| **License** | MIT (model); Apache 2.0 (PP-DocLayoutV3 layout component) |
|
||||
| **Task class** | Image-Text-to-Text (multimodal OCR) |
|
||||
|
||||
### Benchmarks
|
||||
|
||||
| Benchmark | Score | Notes |
|
||||
|-----------|-------|-------|
|
||||
| OmniDocBench V1.5 | **94.62** | Ranked #1 at evaluation date |
|
||||
| olmOCR-bench (overall) | 75.2 | — |
|
||||
| Throughput (base, GPU) | 0.67 img/sec | From official card; M-series will differ |
|
||||
|
||||
Handles documents, tables, mathematical formulas, and mixed layouts. Not just raw text extraction —
|
||||
returns structured markdown output.
|
||||
|
||||
---
|
||||
|
||||
## Runtime on Mac
|
||||
|
||||
### Chosen path: MLX via `mlx-vlm`
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Package** | `mlx-vlm` |
|
||||
| **MLX already installed** | Yes — `mlx 0.31.2`, `mlx-lm 0.31.3`, `mlx-metal 0.31.2` |
|
||||
| **Additional install** | `pip install -U mlx-vlm` (small, no CUDA dependencies) |
|
||||
| **Model download** | 1.59 GB on first run (auto-cached in `~/.cache/huggingface/`) |
|
||||
| **Memory requirement** | ~2–3 GB unified memory (1.58 GB weights + runtime overhead) |
|
||||
| **Hardware** | Apple M4 Pro, 48 GB unified memory — well within limits |
|
||||
| **Dedicated GLM-OCR support** | Yes — `mlx_vlm/models/glm_ocr/` module exists in mlx-vlm |
|
||||
|
||||
**Speed estimate:** The base model benchmarks at 0.67 img/sec on GPU. On M4 Pro via MPS/MLX,
|
||||
expect 0.3–0.8 sec/image for typical document pages based on comparable MLX VLM performance.
|
||||
Exact figures require a timed run with the prototype.
|
||||
|
||||
### Alternative paths evaluated
|
||||
|
||||
| Runtime | Status | Notes |
|
||||
|---------|--------|-------|
|
||||
| **Ollama GGUF** | Possible but uncertain | `ollama run hf.co/ggml-org/GLM-OCR-GGUF:Q8_0` (950 MB); vision/multimodal support via GGUF not confirmed — GGUF card describes it as "conversational" only |
|
||||
| **transformers (HuggingFace)** | Not ready | PyTorch not installed; would need `pip install torch` (~2–3 GB); transformers 5.6.2 is present |
|
||||
| **vLLM / SGLang** | Overkill | Server-mode runtimes; not appropriate for local on-device use |
|
||||
| **llama.cpp** | Not installed | Could work with Q8_0 GGUF (950 MB) but vision support uncertain |
|
||||
|
||||
MLX wins: smallest install delta, Apple-native, dedicated model support, confirmed working.
|
||||
|
||||
---
|
||||
|
||||
## Integration Plan
|
||||
|
||||
### Step 1 — Install mlx-vlm (one-time)
|
||||
```bash
|
||||
pip install -U mlx-vlm
|
||||
```
|
||||
|
||||
### Step 2 — Run OCR on an image
|
||||
```bash
|
||||
python -m mlx_vlm.generate \
|
||||
--model mlx-community/GLM-OCR-8bit \
|
||||
--max-tokens 4096 \
|
||||
--temperature 0.0 \
|
||||
--prompt "Extract all text from this document. Preserve structure including tables and headers." \
|
||||
--image /path/to/document.jpg
|
||||
```
|
||||
|
||||
Model auto-downloads (~1.59 GB) on first run and caches in `~/.cache/huggingface/`.
|
||||
|
||||
### Step 3 — Post to Neuron soul
|
||||
```bash
|
||||
curl -s -X POST http://localhost:7770/api/neuron/memory \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"content\":\"<OCR_TEXT>\",\"label\":\"Photo: filename.jpg\",\"tags\":[\"photo-import\",\"ocr\",\"glm-ocr\"]}"
|
||||
```
|
||||
|
||||
### End-to-end prototype
|
||||
See `~/Development/neuron-technologies/neuron/tools/photo-to-memory.sh` — working stub.
|
||||
|
||||
### Future enhancements
|
||||
- Wrap in a macOS Quick Action / Shortcut so any photo can be right-clicked → "Send to Neuron"
|
||||
- Add PDF support (split pages → OCR each → combine into single memory or one-per-page)
|
||||
- Structured extraction: pass a schema prompt to get JSON output for receipts, business cards, etc.
|
||||
- Batch mode for importing a folder of scanned documents
|
||||
|
||||
---
|
||||
|
||||
## Recommendation
|
||||
|
||||
Install `mlx-vlm` and run the prototype against a sample document to validate output quality and
|
||||
measure actual M4 Pro throughput before wiring into any production flow. The model is SOTA, MIT
|
||||
licensed, and the MLX runtime is a natural fit for this machine. There is no reason not to proceed.
|
||||
|
||||
The photo-to-memory.sh prototype is ready to test immediately after `pip install -U mlx-vlm`.
|
||||
@@ -3,7 +3,7 @@ fn tier_episodic() -> String { return "Episodic" }
|
||||
fn tier_canonical() -> String { return "Canonical" }
|
||||
|
||||
fn mem_store(content: String, label: String, tags: String) -> String {
|
||||
return engram_node_full(
|
||||
let id: String = engram_node_full(
|
||||
content,
|
||||
"Memory",
|
||||
label,
|
||||
@@ -13,6 +13,18 @@ fn mem_store(content: String, label: String, tags: String) -> String {
|
||||
"Working",
|
||||
tags
|
||||
)
|
||||
if str_eq(id, "") {
|
||||
println("[memory] write rejected by engram (empty id): label=" + label)
|
||||
return ""
|
||||
}
|
||||
// Read back to verify the node actually persisted — guards against silent write failures.
|
||||
let readback: String = engram_get_node_json(id)
|
||||
if str_eq(readback, "") || str_eq(readback, "{}") {
|
||||
println("[memory] WRITE VERIFY FAILED: label=" + label + " id=" + id + " — node absent after write")
|
||||
return ""
|
||||
}
|
||||
println("[memory] write verified: " + id + " ok")
|
||||
return id
|
||||
}
|
||||
|
||||
fn mem_remember(content: String, tags: String) -> String {
|
||||
@@ -136,7 +148,12 @@ fn mem_boot_count_inc() -> Int {
|
||||
"Canonical", tags
|
||||
)
|
||||
if str_eq(boot_node_id, "") {
|
||||
println("[memory] mem_boot_count_inc: engram write failed — boot counter node lost (count=" + int_to_str(next) + ")")
|
||||
println("[memory] mem_boot_count_inc: write rejected (empty id) — boot counter node lost (count=" + int_to_str(next) + ")")
|
||||
return next
|
||||
}
|
||||
let boot_readback: String = engram_get_node_json(boot_node_id)
|
||||
if str_eq(boot_readback, "") || str_eq(boot_readback, "{}") {
|
||||
println("[memory] mem_boot_count_inc: WRITE VERIFY FAILED id=" + boot_node_id + " count=" + int_to_str(next))
|
||||
}
|
||||
return next
|
||||
}
|
||||
@@ -155,9 +172,13 @@ fn mem_emit_state_event(trigger: String, kind: String, content: String) -> Strin
|
||||
+ ",\"boot\":" + int_to_str(boot)
|
||||
+ ",\"ts\":" + int_to_str(ts) + "}"
|
||||
let tags: String = "[\"internal-state\",\"pre-reasoning\",\"InternalStateEvent\"]"
|
||||
return engram_node_full(
|
||||
let event_id: String = engram_node_full(
|
||||
payload, "InternalStateEvent", "state-event:" + kind,
|
||||
el_from_float(0.85), el_from_float(0.8), el_from_float(0.9),
|
||||
"Episodic", tags
|
||||
)
|
||||
if str_eq(event_id, "") {
|
||||
println("[memory] mem_emit_state_event: write rejected (empty id): kind=" + kind)
|
||||
}
|
||||
return event_id
|
||||
}
|
||||
|
||||
+3
-1
@@ -94,7 +94,9 @@ fn api_or_empty(s: String) -> String {
|
||||
fn api_persisted(id: String) -> Bool {
|
||||
if str_eq(id, "") { return false }
|
||||
let node: String = engram_get_node_json(id)
|
||||
return !str_eq(node, "") && !str_eq(node, "null")
|
||||
// engram_get_node_json returns "{}" (empty object) when node is not found — not "" or "null".
|
||||
// Check all three to guard against any runtime variation.
|
||||
return !str_eq(node, "") && !str_eq(node, "null") && !str_eq(node, "{}")
|
||||
}
|
||||
|
||||
// api_not_persisted — standard error for a write that did not read back.
|
||||
|
||||
Executable
+221
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env bash
|
||||
# cultivation-digest.sh — Neuron daily cultivation digest
|
||||
# Reads ~/.neuron/engram/snapshot.json and produces a sharpness report.
|
||||
# Writes to ~/.neuron/digests/YYYY-MM-DD.txt and appends to sharpness.json.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SNAPSHOT="$HOME/.neuron/engram/snapshot.json"
|
||||
DIGESTS_DIR="$HOME/.neuron/digests"
|
||||
DATE=$(date +%Y-%m-%d)
|
||||
DIGEST_FILE="$DIGESTS_DIR/$DATE.txt"
|
||||
SHARPNESS_FILE="$DIGESTS_DIR/sharpness.json"
|
||||
|
||||
mkdir -p "$DIGESTS_DIR"
|
||||
|
||||
if [[ ! -f "$SNAPSHOT" ]]; then
|
||||
echo "ERROR: snapshot not found at $SNAPSHOT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Cutoff: now minus 24 hours in milliseconds
|
||||
NOW_MS=$(( $(date +%s) * 1000 ))
|
||||
CUTOFF_MS=$(( NOW_MS - 86400000 ))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compute all metrics via a single jq pass (avoids re-reading 174 MB 10x)
|
||||
# Fields in item lines are tab-separated: type TAB importance TAB content
|
||||
# ---------------------------------------------------------------------------
|
||||
METRICS=$(jq -r --argjson cutoff "$CUTOFF_MS" '
|
||||
.nodes as $all |
|
||||
|
||||
# Real memory nodes — exclude InternalStateEvent and corrupted entries
|
||||
($all | map(select(
|
||||
.node_type != "InternalStateEvent" and
|
||||
(.node_type | test("^[A-Za-z]+$"))
|
||||
))) as $real |
|
||||
|
||||
# Created today
|
||||
($real | map(select(.created_at > $cutoff))) as $new |
|
||||
|
||||
# Activated today but not created today (reinforced)
|
||||
($real | map(select(
|
||||
(.last_activated // 0) > $cutoff and
|
||||
.created_at <= $cutoff
|
||||
))) as $reinforced |
|
||||
|
||||
# Stats for sharpness (across all real nodes)
|
||||
($real | length) as $real_count |
|
||||
($real | if length > 0 then (map(.importance) | add / length) else 0 end) as $avg_imp |
|
||||
($real | if length > 0 then (map(.confidence // 1) | add / length) else 0 end) as $avg_conf |
|
||||
|
||||
# activation_ratio: reinforced nodes today / total real nodes, capped 0-1
|
||||
(($reinforced | length) as $ra |
|
||||
if $real_count > 0 then ($ra / $real_count | if . > 1 then 1 else . end) else 0 end
|
||||
) as $act_ratio |
|
||||
|
||||
# Sharpness score 0-100
|
||||
((($avg_imp * 0.4) + ($avg_conf * 0.3) + ($act_ratio * 0.3)) * 100 | round) as $sharpness |
|
||||
|
||||
# Top new memories (by importance desc, cap 10)
|
||||
($new | sort_by(-.importance) | .[0:10]) as $top_new |
|
||||
|
||||
# Top reinforced (by last_activated desc, cap 10)
|
||||
($reinforced | sort_by(-.last_activated) | .[0:10]) as $top_reinforced |
|
||||
|
||||
# High-importance nodes (importance > 0.8), across all real nodes
|
||||
($real | map(select(.importance > 0.8)) | length) as $high_imp_count |
|
||||
|
||||
# Scalar metrics
|
||||
"TOTAL_REAL=\($real_count)",
|
||||
"NEW_COUNT=\($new | length)",
|
||||
"REINFORCED_COUNT=\($reinforced | length)",
|
||||
"TOTAL_NODES=\($all | length)",
|
||||
"AVG_IMP=\($avg_imp)",
|
||||
"AVG_CONF=\($avg_conf)",
|
||||
"ACT_RATIO=\($act_ratio)",
|
||||
"SHARPNESS=\($sharpness)",
|
||||
"HIGH_IMP=\($high_imp_count)",
|
||||
|
||||
# Item sections — fields separated by tab character (\t)
|
||||
"---NEW---",
|
||||
($top_new[] | [.node_type, (.importance | tostring), (.content[0:120] | gsub("\n";" "))] | join("\t")),
|
||||
"---REINFORCED---",
|
||||
($top_reinforced[] | [(.label[0:80] | gsub("\n";" ")), ("activated \(.activation_count)x total")] | join("\t"))
|
||||
' "$SNAPSHOT" 2>/dev/null)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parse scalar metrics
|
||||
# ---------------------------------------------------------------------------
|
||||
parse() { printf '%s' "$METRICS" | grep "^$1=" | head -1 | cut -d= -f2-; }
|
||||
|
||||
TOTAL_REAL=$(parse TOTAL_REAL)
|
||||
NEW_COUNT=$(parse NEW_COUNT)
|
||||
REINFORCED_COUNT=$(parse REINFORCED_COUNT)
|
||||
TOTAL_NODES=$(parse TOTAL_NODES)
|
||||
AVG_IMP=$(parse AVG_IMP)
|
||||
AVG_CONF=$(parse AVG_CONF)
|
||||
ACT_RATIO=$(parse ACT_RATIO)
|
||||
SHARPNESS=$(parse SHARPNESS)
|
||||
HIGH_IMP=$(parse HIGH_IMP)
|
||||
|
||||
# Format floats to 2dp (use awk, avoiding bc locale issues)
|
||||
fmt2() { awk "BEGIN{printf \"%.2f\", $1}"; }
|
||||
fmt4() { awk "BEGIN{printf \"%.4f\", $1}"; }
|
||||
AVG_IMP_FMT=$(fmt2 "$AVG_IMP")
|
||||
AVG_CONF_FMT=$(fmt2 "$AVG_CONF")
|
||||
ACT_RATIO_FMT=$(fmt4 "$ACT_RATIO")
|
||||
IMP_CONTRIB=$(fmt4 "$(awk "BEGIN{printf \"%.6f\", $AVG_IMP * 0.4}")")
|
||||
CONF_CONTRIB=$(fmt4 "$(awk "BEGIN{printf \"%.6f\", $AVG_CONF * 0.3}")")
|
||||
ACT_CONTRIB=$(fmt4 "$(awk "BEGIN{printf \"%.6f\", $ACT_RATIO * 0.3}")")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sharpness delta (compare to yesterday)
|
||||
# ---------------------------------------------------------------------------
|
||||
DELTA_STR=""
|
||||
if [[ -f "$SHARPNESS_FILE" ]]; then
|
||||
YESTERDAY=$(date -v-1d +%Y-%m-%d 2>/dev/null || date -d "yesterday" +%Y-%m-%d 2>/dev/null || echo "")
|
||||
if [[ -n "$YESTERDAY" ]]; then
|
||||
PREV_SHARPNESS=$(jq -r --arg d "$YESTERDAY" '.[] | select(.date == $d) | .sharpness' "$SHARPNESS_FILE" 2>/dev/null | tail -1)
|
||||
if [[ -n "$PREV_SHARPNESS" && "$PREV_SHARPNESS" != "null" ]]; then
|
||||
DELTA=$(( SHARPNESS - PREV_SHARPNESS ))
|
||||
if (( DELTA > 0 )); then
|
||||
DELTA_STR=" (up ${DELTA}% from yesterday)"
|
||||
elif (( DELTA < 0 )); then
|
||||
DELTA_STR=" (down ${DELTA#-}% from yesterday)"
|
||||
else
|
||||
DELTA_STR=" (no change from yesterday)"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build new-memories section (tab-delimited: type TAB importance TAB content)
|
||||
# ---------------------------------------------------------------------------
|
||||
new_section() {
|
||||
local lines
|
||||
lines=$(printf '%s\n' "$METRICS" | awk '/^---NEW---/{found=1; next} /^---REINFORCED---/{exit} found{print}')
|
||||
if [[ -z "$lines" ]]; then
|
||||
echo " (none)"
|
||||
return
|
||||
fi
|
||||
while IFS=$'\t' read -r ntype importance content; do
|
||||
[[ -z "$ntype" ]] && continue
|
||||
imp_fmt=$(awk "BEGIN{printf \"%.1f\", $importance}")
|
||||
printf " [%-18s] (importance: %s) %s\n" "$ntype" "$imp_fmt" "$content"
|
||||
done <<< "$lines"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build reinforced section (tab-delimited: label TAB activation-info)
|
||||
# ---------------------------------------------------------------------------
|
||||
reinforced_section() {
|
||||
local lines
|
||||
lines=$(printf '%s\n' "$METRICS" | awk '/^---REINFORCED---/{found=1; next} found{print}')
|
||||
if [[ -z "$lines" ]]; then
|
||||
echo " (none today)"
|
||||
return
|
||||
fi
|
||||
while IFS=$'\t' read -r label acts; do
|
||||
[[ -z "$label" ]] && continue
|
||||
printf " \"%s\" — %s\n" "$label" "$acts"
|
||||
done <<< "$lines"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Render full digest
|
||||
# ---------------------------------------------------------------------------
|
||||
DIGEST=$(cat <<EOF
|
||||
=== Neuron Cultivation Digest — ${DATE} ===
|
||||
|
||||
SHARPNESS: ${SHARPNESS}%${DELTA_STR}
|
||||
|
||||
TODAY'S MEMORIES (${NEW_COUNT} new):
|
||||
$(new_section)
|
||||
|
||||
REINFORCED (${REINFORCED_COUNT} nodes re-activated today):
|
||||
$(reinforced_section)
|
||||
|
||||
MEMORY HEALTH:
|
||||
Total nodes (all): ${TOTAL_NODES}
|
||||
Real memory nodes: ${TOTAL_REAL}
|
||||
Avg importance: ${AVG_IMP_FMT}
|
||||
Avg confidence: ${AVG_CONF_FMT}
|
||||
High-importance nodes (>0.8): ${HIGH_IMP}
|
||||
Nodes created today: ${NEW_COUNT}
|
||||
Nodes re-activated today: ${REINFORCED_COUNT}
|
||||
|
||||
SHARPNESS FORMULA:
|
||||
Sharpness = (avg_importance x 0.4) + (avg_confidence x 0.3) + (activation_ratio x 0.3)
|
||||
avg_importance = ${AVG_IMP_FMT} -> ${AVG_IMP_FMT} x 0.4 = ${IMP_CONTRIB}
|
||||
avg_confidence = ${AVG_CONF_FMT} -> ${AVG_CONF_FMT} x 0.3 = ${CONF_CONTRIB}
|
||||
activation_ratio = ${ACT_RATIO_FMT} -> ratio x 0.3 = ${ACT_CONTRIB}
|
||||
Result: ${SHARPNESS}%
|
||||
|
||||
Generated: $(date)
|
||||
EOF
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Write digest file + print to stdout
|
||||
# ---------------------------------------------------------------------------
|
||||
printf '%s\n' "$DIGEST" | tee "$DIGEST_FILE"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Append to sharpness.json
|
||||
# ---------------------------------------------------------------------------
|
||||
NEW_ENTRY="{\"date\":\"${DATE}\",\"sharpness\":${SHARPNESS},\"node_count\":${TOTAL_NODES},\"real_node_count\":${TOTAL_REAL},\"nodes_added\":${NEW_COUNT},\"nodes_reinforced\":${REINFORCED_COUNT}}"
|
||||
|
||||
if [[ -f "$SHARPNESS_FILE" ]]; then
|
||||
UPDATED=$(jq --arg d "$DATE" --argjson entry "$NEW_ENTRY" '
|
||||
map(select(.date != $d)) + [$entry]
|
||||
' "$SHARPNESS_FILE" 2>/dev/null) || UPDATED="[$NEW_ENTRY]"
|
||||
printf '%s\n' "$UPDATED" > "$SHARPNESS_FILE"
|
||||
else
|
||||
printf '[%s]\n' "$NEW_ENTRY" > "$SHARPNESS_FILE"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Digest written to: $DIGEST_FILE"
|
||||
echo "Sharpness log: $SHARPNESS_FILE"
|
||||
Executable
+162
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env bash
|
||||
# memory-export.sh — Export Neuron engram store as a portable encrypted .neuronmem bundle
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/memory-export.sh [output-path] [--passphrase "your passphrase"]
|
||||
#
|
||||
# If no passphrase is given, a random one is generated and printed — write it down.
|
||||
# If no output path is given, defaults to ./neuron-export-<timestamp>.neuronmem
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||
ENGRAM_SNAPSHOT="${HOME}/.neuron/engram/snapshot.json"
|
||||
SOUL_VERSION="1.1.0"
|
||||
FORMAT_VERSION="1"
|
||||
|
||||
# ── Parse args ─────────────────────────────────────────────────────────────────
|
||||
OUTPUT_PATH=""
|
||||
PASSPHRASE=""
|
||||
PASSPHRASE_SET=0
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--passphrase)
|
||||
PASSPHRASE="$2"
|
||||
PASSPHRASE_SET=1
|
||||
shift 2
|
||||
;;
|
||||
--passphrase=*)
|
||||
PASSPHRASE="${1#*=}"
|
||||
PASSPHRASE_SET=1
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option: $1" >&2
|
||||
echo "Usage: $0 [output-path] [--passphrase \"...\"]" >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
if [[ -z "$OUTPUT_PATH" ]]; then
|
||||
OUTPUT_PATH="$1"
|
||||
else
|
||||
echo "Unexpected argument: $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ── Default output path ────────────────────────────────────────────────────────
|
||||
TIMESTAMP="$(date -u +"%Y%m%dT%H%M%SZ")"
|
||||
if [[ -z "$OUTPUT_PATH" ]]; then
|
||||
OUTPUT_PATH="./neuron-export-${TIMESTAMP}.neuronmem"
|
||||
fi
|
||||
|
||||
# Ensure .neuronmem extension
|
||||
if [[ "${OUTPUT_PATH}" != *.neuronmem ]]; then
|
||||
OUTPUT_PATH="${OUTPUT_PATH%.neuronmem}.neuronmem"
|
||||
fi
|
||||
|
||||
# ── Validate source ────────────────────────────────────────────────────────────
|
||||
if [[ ! -f "$ENGRAM_SNAPSHOT" ]]; then
|
||||
echo "ERROR: Engram snapshot not found at: $ENGRAM_SNAPSHOT" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Neuron Memory Export"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Source: $ENGRAM_SNAPSHOT"
|
||||
echo "Output: $OUTPUT_PATH"
|
||||
echo ""
|
||||
|
||||
# ── Generate passphrase if not provided ────────────────────────────────────────
|
||||
if [[ $PASSPHRASE_SET -eq 0 ]]; then
|
||||
PASSPHRASE="$(openssl rand -base64 32)"
|
||||
echo "⚠ No passphrase provided. Generated passphrase:"
|
||||
echo ""
|
||||
echo " ${PASSPHRASE}"
|
||||
echo ""
|
||||
echo "⚠ WRITE THIS DOWN. You will need it to import this file."
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# ── Count nodes and edges ──────────────────────────────────────────────────────
|
||||
echo "Analyzing snapshot..."
|
||||
NODE_COUNT="$(python3 -c "
|
||||
import json, sys
|
||||
with open('${ENGRAM_SNAPSHOT}') as f:
|
||||
d = json.load(f)
|
||||
nodes = d.get('nodes', d if isinstance(d, list) else [])
|
||||
edges = d.get('edges', [])
|
||||
print(len(nodes) if isinstance(nodes, list) else len(nodes))
|
||||
" 2>/dev/null || echo "unknown")"
|
||||
|
||||
echo " Nodes: ${NODE_COUNT}"
|
||||
|
||||
# ── Compute checksum of source file ───────────────────────────────────────────
|
||||
echo "Computing checksum..."
|
||||
CHECKSUM="$(openssl dgst -sha256 "$ENGRAM_SNAPSHOT" | awk '{print $NF}')"
|
||||
echo " SHA256: ${CHECKSUM:0:16}..."
|
||||
|
||||
# ── Build bundle in temp dir ───────────────────────────────────────────────────
|
||||
WORK_DIR="$(mktemp -d)"
|
||||
BUNDLE_DIR="${WORK_DIR}/neuronmem-v${FORMAT_VERSION}"
|
||||
mkdir -p "$BUNDLE_DIR"
|
||||
|
||||
echo "Building bundle..."
|
||||
|
||||
# Copy snapshot as nodes.json
|
||||
cp "$ENGRAM_SNAPSHOT" "${BUNDLE_DIR}/nodes.json"
|
||||
|
||||
# Write metadata.json
|
||||
ISO_TIMESTAMP="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
||||
cat > "${BUNDLE_DIR}/metadata.json" << METAEOF
|
||||
{
|
||||
"version": "${FORMAT_VERSION}",
|
||||
"exported_at": "${ISO_TIMESTAMP}",
|
||||
"node_count": ${NODE_COUNT},
|
||||
"soul_version": "${SOUL_VERSION}",
|
||||
"sha256": "${CHECKSUM}",
|
||||
"format": "neuronmem-v1",
|
||||
"encryption": "aes-256-cbc-pbkdf2",
|
||||
"source_host": "$(hostname -s 2>/dev/null || echo unknown)"
|
||||
}
|
||||
METAEOF
|
||||
|
||||
echo " metadata.json written"
|
||||
echo " nodes.json copied ($(du -sh "${BUNDLE_DIR}/nodes.json" | cut -f1))"
|
||||
|
||||
# ── Create tar.gz ──────────────────────────────────────────────────────────────
|
||||
TAR_PATH="${WORK_DIR}/bundle.tar.gz"
|
||||
echo "Compressing..."
|
||||
(cd "$WORK_DIR" && tar czf "$TAR_PATH" "neuronmem-v${FORMAT_VERSION}/")
|
||||
COMPRESSED_SIZE="$(du -sh "$TAR_PATH" | cut -f1)"
|
||||
echo " Compressed size: ${COMPRESSED_SIZE}"
|
||||
|
||||
# ── Encrypt ────────────────────────────────────────────────────────────────────
|
||||
echo "Encrypting (AES-256-CBC, PBKDF2, 600k iterations)..."
|
||||
openssl enc -aes-256-cbc \
|
||||
-pbkdf2 \
|
||||
-iter 600000 \
|
||||
-salt \
|
||||
-in "$TAR_PATH" \
|
||||
-out "$OUTPUT_PATH" \
|
||||
-pass "pass:${PASSPHRASE}"
|
||||
|
||||
# ── Cleanup ────────────────────────────────────────────────────────────────────
|
||||
rm -rf "$WORK_DIR"
|
||||
|
||||
# ── Report ─────────────────────────────────────────────────────────────────────
|
||||
FINAL_SIZE="$(du -sh "$OUTPUT_PATH" | cut -f1)"
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Export complete."
|
||||
echo " File: $OUTPUT_PATH"
|
||||
echo " Size: ${FINAL_SIZE}"
|
||||
echo " Nodes: ${NODE_COUNT}"
|
||||
echo " Checksum: ${CHECKSUM:0:32}..."
|
||||
echo " Timestamp: ${ISO_TIMESTAMP}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
Executable
+427
@@ -0,0 +1,427 @@
|
||||
#!/usr/bin/env bash
|
||||
# memory-import-refugee.sh — Import conversation/memory history from external apps into Neuron
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/memory-import-refugee.sh --format chatgpt conversations.json
|
||||
# ./tools/memory-import-refugee.sh --format screenpipe screenpipe-export.json
|
||||
# ./tools/memory-import-refugee.sh --format generic data.json[l]
|
||||
#
|
||||
# Supported formats:
|
||||
# chatgpt — ChatGPT conversation export (conversations.json)
|
||||
# screenpipe — Screenpipe OCR export (frames array)
|
||||
# generic — Any JSON array or JSONL with content/text fields
|
||||
#
|
||||
# The script writes Memory nodes to the Neuron soul via its HTTP API.
|
||||
# The soul must be running on localhost:7770.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||
SOUL_HOST="http://localhost:7770"
|
||||
# Note: POST /api/neuron/memory ignores the label field (soul hardcodes "memory:remembered").
|
||||
# We embed the label in the content prefix so it is searchable.
|
||||
MEMORY_API="${SOUL_HOST}/api/neuron/memory"
|
||||
SLEEP_MS=100 # ms between API calls (rate limiting)
|
||||
|
||||
# ── Dependency check ───────────────────────────────────────────────────────────
|
||||
if ! command -v jq &>/dev/null; then
|
||||
echo "ERROR: jq is required but not installed." >&2
|
||||
echo "" >&2
|
||||
echo "Install it with:" >&2
|
||||
echo " macOS: brew install jq" >&2
|
||||
echo " Ubuntu: sudo apt-get install jq" >&2
|
||||
echo " Alpine: apk add jq" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Parse args ─────────────────────────────────────────────────────────────────
|
||||
FORMAT=""
|
||||
INPUT_FILE=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--format|-f)
|
||||
FORMAT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--format=*|-f=*)
|
||||
FORMAT="${1#*=}"
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option: $1" >&2
|
||||
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
if [[ -z "$INPUT_FILE" ]]; then
|
||||
INPUT_FILE="$1"
|
||||
else
|
||||
echo "Unexpected argument: $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FORMAT" ]]; then
|
||||
echo "ERROR: --format is required." >&2
|
||||
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$INPUT_FILE" ]]; then
|
||||
echo "ERROR: No input file specified." >&2
|
||||
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$INPUT_FILE" ]]; then
|
||||
echo "ERROR: Input file not found: $INPUT_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$FORMAT" in
|
||||
chatgpt|screenpipe|generic) ;;
|
||||
*)
|
||||
echo "ERROR: Unknown format: $FORMAT" >&2
|
||||
echo "Supported formats: chatgpt, screenpipe, generic" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# ── Soul health check ──────────────────────────────────────────────────────────
|
||||
HTTP_CODE="$(curl -s -o /dev/null -w "%{http_code}" "${SOUL_HOST}/api/neuron/memory" 2>/dev/null || echo "000")"
|
||||
if [[ "$HTTP_CODE" == "000" ]]; then
|
||||
echo "ERROR: Neuron soul is not responding at ${SOUL_HOST}." >&2
|
||||
echo " Start the soul service and retry." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Counters ───────────────────────────────────────────────────────────────────
|
||||
IMPORTED=0
|
||||
SKIPPED=0
|
||||
ERRORS=0
|
||||
|
||||
# ── Helper: post one memory node ───────────────────────────────────────────────
|
||||
# post_memory CONTENT LABEL TAGS_JSON
|
||||
#
|
||||
# Note: the soul's POST /api/neuron/memory API ignores the label field (hardcodes
|
||||
# it to "memory:remembered"). We embed the label as a prefix in the content so
|
||||
# the title remains searchable via recall/search.
|
||||
post_memory() {
|
||||
local content="$1"
|
||||
local label="$2"
|
||||
local tags_json="$3"
|
||||
|
||||
# Skip empty content
|
||||
if [[ -z "$content" || "$content" == "null" ]]; then
|
||||
SKIPPED=$((SKIPPED + 1))
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Embed label in content so it's searchable (the API ignores the label field)
|
||||
local full_content="[${label}] ${content}"
|
||||
|
||||
local payload
|
||||
payload="$(jq -n \
|
||||
--arg content "$full_content" \
|
||||
--arg label "$label" \
|
||||
--argjson tags "$tags_json" \
|
||||
'{content: $content, label: $label, tags: $tags}')"
|
||||
|
||||
local response
|
||||
response="$(curl -s -X POST "$MEMORY_API" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$payload" 2>/dev/null)"
|
||||
|
||||
local ok
|
||||
ok="$(echo "$response" | jq -r '.ok // "false"' 2>/dev/null)"
|
||||
|
||||
if [[ "$ok" == "true" ]]; then
|
||||
IMPORTED=$((IMPORTED + 1))
|
||||
else
|
||||
ERRORS=$((ERRORS + 1))
|
||||
echo " [ERROR] API error for label \"${label:0:60}\": $response" >&2
|
||||
fi
|
||||
|
||||
# Rate limit: sleep 100ms
|
||||
sleep "0.${SLEEP_MS}"
|
||||
}
|
||||
|
||||
# ── Format: ChatGPT ────────────────────────────────────────────────────────────
|
||||
import_chatgpt() {
|
||||
echo "Format: ChatGPT conversation export"
|
||||
|
||||
# Validate: must be JSON array at top level
|
||||
local top_type
|
||||
top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)"
|
||||
if [[ "$top_type" != "array" ]]; then
|
||||
echo "ERROR: ChatGPT export must be a JSON array of conversations." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local conv_count
|
||||
conv_count="$(jq 'length' "$INPUT_FILE")"
|
||||
echo "Found ${conv_count} conversation(s) to process."
|
||||
echo ""
|
||||
|
||||
# Count total user messages for progress display
|
||||
local total_msgs
|
||||
total_msgs="$(jq '[.[].mapping // {} | to_entries[] | .value.message | select(. != null and .author.role == "user") | .content.parts // [] | .[] | select(type == "string" and length > 0)] | length' "$INPUT_FILE" 2>/dev/null || echo "?")"
|
||||
echo "Total user messages: ${total_msgs}"
|
||||
echo ""
|
||||
|
||||
local msg_idx=0
|
||||
|
||||
# Process each conversation
|
||||
while IFS= read -r conv_json; do
|
||||
local title
|
||||
title="$(echo "$conv_json" | jq -r '.title // "Untitled"')"
|
||||
|
||||
# Truncate label to 100 chars
|
||||
local label="${title:0:100}"
|
||||
|
||||
# Extract user messages — ChatGPT export uses a mapping dict structure
|
||||
# Mapping: { uuid: { id, message: { author: { role }, content: { parts: [...] } }, ... } }
|
||||
# We iterate over mapping values, filter role=user, grab text parts
|
||||
while IFS= read -r msg_text; do
|
||||
msg_idx=$((msg_idx + 1))
|
||||
echo " Importing ${msg_idx}/${total_msgs}..."
|
||||
post_memory "$msg_text" "$label" '["chatgpt-import","conversation"]'
|
||||
done < <(echo "$conv_json" | jq -r '
|
||||
.mapping // {} |
|
||||
to_entries[] |
|
||||
.value.message |
|
||||
select(. != null) |
|
||||
select(.author.role == "user") |
|
||||
.content.parts // [] |
|
||||
.[] |
|
||||
select(type == "string" and length > 0)
|
||||
' 2>/dev/null)
|
||||
|
||||
done < <(jq -c '.[]' "$INPUT_FILE")
|
||||
}
|
||||
|
||||
# ── Format: Screenpipe ─────────────────────────────────────────────────────────
|
||||
import_screenpipe() {
|
||||
echo "Format: Screenpipe OCR export"
|
||||
|
||||
# Validate: must have frames array
|
||||
local top_type
|
||||
top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)"
|
||||
if [[ "$top_type" != "object" ]]; then
|
||||
echo "ERROR: Screenpipe export must be a JSON object with a 'frames' array." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local frame_count
|
||||
frame_count="$(jq '.frames | length' "$INPUT_FILE" 2>/dev/null || echo "0")"
|
||||
echo "Found ${frame_count} frame(s) to process."
|
||||
|
||||
if [[ "$frame_count" == "0" ]]; then
|
||||
echo "No frames found. Nothing to import."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Group frames by app_name + 5-minute window bucket
|
||||
# Strategy: process sorted frames, emit a group when app or bucket changes.
|
||||
# We do this in pure jq with a reduce, emitting groups as newline-delimited JSON.
|
||||
|
||||
local total_groups=0
|
||||
local group_idx=0
|
||||
|
||||
# Collect groups: each group is { app, bucket_ts, texts: [...] }
|
||||
# Bucket = floor(timestamp_epoch / 300) * 300 seconds
|
||||
# timestamps may be ISO8601 or epoch — handle both
|
||||
|
||||
# We process in jq and emit one group per line as JSON
|
||||
while IFS= read -r group_json; do
|
||||
total_groups=$((total_groups + 1))
|
||||
# Just count first
|
||||
:
|
||||
done < <(jq -c '
|
||||
.frames |
|
||||
map(select(.text != null and (.text | length) > 0)) |
|
||||
group_by(.app_name) |
|
||||
.[] |
|
||||
. as $app_frames |
|
||||
($app_frames[0].app_name) as $app |
|
||||
# Sort by timestamp within app
|
||||
(sort_by(.timestamp)) |
|
||||
# Group into 5-minute buckets
|
||||
reduce .[] as $f (
|
||||
{bucket: null, texts: [], ts: null, groups: []};
|
||||
($f.timestamp // "") as $ts |
|
||||
# Derive numeric bucket: try epoch directly; for ISO use first 15 chars as bucket key
|
||||
(if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor)
|
||||
else ($ts[0:15])
|
||||
end) as $bucket |
|
||||
if .bucket == null then
|
||||
{bucket: $bucket, texts: [$f.text], ts: $ts, groups: .groups}
|
||||
elif .bucket == $bucket then
|
||||
{bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, groups: .groups}
|
||||
else
|
||||
{bucket: $bucket, texts: [$f.text], ts: $ts,
|
||||
groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])}
|
||||
end
|
||||
) |
|
||||
# flush last bucket
|
||||
(.groups + [{app: .app_name, ts: .ts, texts: .texts}]) |
|
||||
.[] |
|
||||
select(.texts | length > 0)
|
||||
' "$INPUT_FILE" 2>/dev/null)
|
||||
|
||||
# Now actually process
|
||||
while IFS= read -r group_json; do
|
||||
group_idx=$((group_idx + 1))
|
||||
echo " Importing ${group_idx}..."
|
||||
|
||||
local app_name ts_str content label
|
||||
|
||||
app_name="$(echo "$group_json" | jq -r '.app // "unknown"')"
|
||||
ts_str="$(echo "$group_json" | jq -r '.ts // ""')"
|
||||
|
||||
# Concatenate texts, truncate to 2000 chars
|
||||
content="$(echo "$group_json" | jq -r '.texts | join(" ")' | cut -c1-2000)"
|
||||
label="Screenpipe: ${app_name} at ${ts_str:0:16}"
|
||||
|
||||
local tags_json
|
||||
tags_json="$(jq -n --arg app "$app_name" '["screenpipe-import","screen-capture",$app]')"
|
||||
|
||||
post_memory "$content" "$label" "$tags_json"
|
||||
|
||||
done < <(jq -c '
|
||||
.frames |
|
||||
map(select(.text != null and (.text | length) > 0)) |
|
||||
group_by(.app_name) |
|
||||
.[] |
|
||||
. as $app_frames |
|
||||
($app_frames[0].app_name) as $app |
|
||||
(sort_by(.timestamp)) |
|
||||
reduce .[] as $f (
|
||||
{bucket: null, texts: [], ts: null, app: $app, groups: []};
|
||||
($f.timestamp // "") as $ts |
|
||||
(if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor | tostring)
|
||||
else ($ts[0:15])
|
||||
end) as $bucket |
|
||||
if .bucket == null then
|
||||
{bucket: $bucket, texts: [$f.text], ts: $ts, app: $app, groups: .groups}
|
||||
elif .bucket == $bucket then
|
||||
{bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, app: $app, groups: .groups}
|
||||
else
|
||||
{bucket: $bucket, texts: [$f.text], ts: $ts, app: $app,
|
||||
groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])}
|
||||
end
|
||||
) |
|
||||
(.groups + [{app: .app, ts: .ts, texts: .texts}]) |
|
||||
.[] |
|
||||
select(.texts | length > 0)
|
||||
' "$INPUT_FILE" 2>/dev/null)
|
||||
}
|
||||
|
||||
# ── Format: Generic ────────────────────────────────────────────────────────────
|
||||
import_generic() {
|
||||
echo "Format: Generic JSON/JSONL"
|
||||
|
||||
# Detect if JSONL (one JSON object per line) or single JSON array/object
|
||||
local first_char
|
||||
first_char="$(head -c1 "$INPUT_FILE" 2>/dev/null)"
|
||||
|
||||
local records_file
|
||||
records_file="$(mktemp)"
|
||||
trap 'rm -f "$records_file"' RETURN
|
||||
|
||||
if [[ "$first_char" == "[" ]]; then
|
||||
# JSON array — explode to one object per line
|
||||
jq -c '.[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true
|
||||
elif [[ "$first_char" == "{" ]]; then
|
||||
# Single object or JSONL — try JSONL first
|
||||
# JSONL: each line is valid JSON
|
||||
# Check if the whole file is one object or multiple lines
|
||||
local line_count
|
||||
line_count="$(wc -l < "$INPUT_FILE" | tr -d ' ')"
|
||||
if [[ "$line_count" -le 1 ]]; then
|
||||
# Single object: wrap in array and explode
|
||||
jq -c '[.] | .[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true
|
||||
else
|
||||
# Assume JSONL
|
||||
cp "$INPUT_FILE" "$records_file"
|
||||
fi
|
||||
else
|
||||
# Try JSONL anyway
|
||||
cp "$INPUT_FILE" "$records_file"
|
||||
fi
|
||||
|
||||
local total_records
|
||||
total_records="$(wc -l < "$records_file" | tr -d ' ')"
|
||||
echo "Found ${total_records} record(s) to process."
|
||||
echo ""
|
||||
|
||||
local idx=0
|
||||
while IFS= read -r record_json; do
|
||||
[[ -z "$record_json" ]] && continue
|
||||
|
||||
idx=$((idx + 1))
|
||||
echo " Importing ${idx}/${total_records}..."
|
||||
|
||||
# Extract content: prefer 'content', fall back to 'text', then 'body', then 'message'
|
||||
local content
|
||||
content="$(echo "$record_json" | jq -r '
|
||||
if .content != null and (.content | type) == "string" then .content
|
||||
elif .text != null and (.text | type) == "string" then .text
|
||||
elif .body != null and (.body | type) == "string" then .body
|
||||
elif .message != null and (.message | type) == "string" then .message
|
||||
else ""
|
||||
end
|
||||
' 2>/dev/null)"
|
||||
|
||||
[[ -z "$content" || "$content" == "null" ]] && { SKIPPED=$((SKIPPED + 1)); continue; }
|
||||
|
||||
# Extract label: prefer 'title', then 'label', then 'name', then first 80 chars of content
|
||||
local label
|
||||
label="$(echo "$record_json" | jq -r '
|
||||
if .title != null and (.title | type) == "string" then .title
|
||||
elif .label != null and (.label | type) == "string" then .label
|
||||
elif .name != null and (.name | type) == "string" then .name
|
||||
else ""
|
||||
end
|
||||
' 2>/dev/null)"
|
||||
|
||||
if [[ -z "$label" || "$label" == "null" ]]; then
|
||||
label="${content:0:80}"
|
||||
fi
|
||||
label="${label:0:100}"
|
||||
|
||||
post_memory "$content" "$label" '["imported","generic"]'
|
||||
|
||||
done < "$records_file"
|
||||
}
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||
echo "Neuron Refugee Importer"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Source: $INPUT_FILE"
|
||||
echo "Format: $FORMAT"
|
||||
echo "Soul: $SOUL_HOST"
|
||||
echo ""
|
||||
|
||||
case "$FORMAT" in
|
||||
chatgpt) import_chatgpt ;;
|
||||
screenpipe) import_screenpipe ;;
|
||||
generic) import_generic ;;
|
||||
esac
|
||||
|
||||
# ── Final report ───────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Import complete."
|
||||
echo " Imported: ${IMPORTED}"
|
||||
echo " Skipped: ${SKIPPED}"
|
||||
echo " Errors: ${ERRORS}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
if [[ $ERRORS -gt 0 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
Executable
+289
@@ -0,0 +1,289 @@
|
||||
#!/usr/bin/env bash
|
||||
# memory-import.sh — Import a Neuron .neuronmem bundle onto this device
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/memory-import.sh input.neuronmem [--passphrase "your passphrase"]
|
||||
# ./tools/memory-import.sh input.neuronmem [--dry-run] # verify only, no changes
|
||||
#
|
||||
# The script will:
|
||||
# 1. Decrypt and unpack the .neuronmem file
|
||||
# 2. Validate the checksum and version
|
||||
# 3. Back up the current snapshot.json
|
||||
# 4. Stop the soul service
|
||||
# 5. Replace snapshot.json
|
||||
# 6. Restart the soul service
|
||||
# 7. Verify the soul came back up
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||
ENGRAM_SNAPSHOT="${HOME}/.neuron/engram/snapshot.json"
|
||||
SOUL_SERVICE="ai.neurontechnologies.soul"
|
||||
SOUL_PORT="7770"
|
||||
SOUL_STARTUP_TIMEOUT=30 # seconds to wait for soul to come back
|
||||
|
||||
# ── Parse args ─────────────────────────────────────────────────────────────────
|
||||
INPUT_PATH=""
|
||||
PASSPHRASE=""
|
||||
PASSPHRASE_SET=0
|
||||
DRY_RUN=0
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--passphrase)
|
||||
PASSPHRASE="$2"
|
||||
PASSPHRASE_SET=1
|
||||
shift 2
|
||||
;;
|
||||
--passphrase=*)
|
||||
PASSPHRASE="${1#*=}"
|
||||
PASSPHRASE_SET=1
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option: $1" >&2
|
||||
echo "Usage: $0 input.neuronmem [--passphrase \"...\"] [--dry-run]" >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
if [[ -z "$INPUT_PATH" ]]; then
|
||||
INPUT_PATH="$1"
|
||||
else
|
||||
echo "Unexpected argument: $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$INPUT_PATH" ]]; then
|
||||
echo "ERROR: No input file specified." >&2
|
||||
echo "Usage: $0 input.neuronmem [--passphrase \"...\"] [--dry-run]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$INPUT_PATH" ]]; then
|
||||
echo "ERROR: Input file not found: $INPUT_PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Neuron Memory Import"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Source: $INPUT_PATH"
|
||||
echo "Target: $ENGRAM_SNAPSHOT"
|
||||
if [[ $DRY_RUN -eq 1 ]]; then
|
||||
echo "Mode: DRY RUN (no changes will be made)"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# ── Prompt for passphrase if needed ───────────────────────────────────────────
|
||||
if [[ $PASSPHRASE_SET -eq 0 ]]; then
|
||||
read -r -s -p "Enter passphrase: " PASSPHRASE
|
||||
echo ""
|
||||
if [[ -z "$PASSPHRASE" ]]; then
|
||||
echo "ERROR: Passphrase cannot be empty." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Decrypt to temp dir ────────────────────────────────────────────────────────
|
||||
WORK_DIR="$(mktemp -d)"
|
||||
CLEANUP() {
|
||||
rm -rf "$WORK_DIR"
|
||||
}
|
||||
trap CLEANUP EXIT
|
||||
|
||||
TAR_PATH="${WORK_DIR}/bundle.tar.gz"
|
||||
|
||||
echo "Decrypting..."
|
||||
if ! openssl enc -d -aes-256-cbc \
|
||||
-pbkdf2 \
|
||||
-iter 600000 \
|
||||
-in "$INPUT_PATH" \
|
||||
-out "$TAR_PATH" \
|
||||
-pass "pass:${PASSPHRASE}" 2>/dev/null; then
|
||||
echo "ERROR: Decryption failed. Wrong passphrase or corrupted file." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo " Decrypted successfully."
|
||||
|
||||
# ── Unpack ─────────────────────────────────────────────────────────────────────
|
||||
echo "Unpacking..."
|
||||
(cd "$WORK_DIR" && tar xzf "$TAR_PATH") || {
|
||||
echo "ERROR: Failed to unpack bundle. File may be corrupted." >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Locate the bundle directory (neuronmem-v1/)
|
||||
BUNDLE_DIR=""
|
||||
for d in "${WORK_DIR}"/neuronmem-v*/; do
|
||||
if [[ -d "$d" ]]; then
|
||||
BUNDLE_DIR="$d"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "$BUNDLE_DIR" ]]; then
|
||||
echo "ERROR: Bundle directory not found. Invalid .neuronmem file." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
METADATA_FILE="${BUNDLE_DIR}metadata.json"
|
||||
NODES_FILE="${BUNDLE_DIR}nodes.json"
|
||||
|
||||
if [[ ! -f "$METADATA_FILE" ]]; then
|
||||
echo "ERROR: metadata.json missing from bundle." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$NODES_FILE" ]]; then
|
||||
echo "ERROR: nodes.json missing from bundle." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Validate metadata ──────────────────────────────────────────────────────────
|
||||
echo "Validating metadata..."
|
||||
FORMAT_VERSION="$(python3 -c "import json; d=json.load(open('${METADATA_FILE}')); print(d.get('version','?'))")"
|
||||
EXPORTED_AT="$(python3 -c "import json; d=json.load(open('${METADATA_FILE}')); print(d.get('exported_at','?'))")"
|
||||
EXPECTED_COUNT="$(python3 -c "import json; d=json.load(open('${METADATA_FILE}')); print(d.get('node_count','?'))")"
|
||||
STORED_CHECKSUM="$(python3 -c "import json; d=json.load(open('${METADATA_FILE}')); print(d.get('sha256','?'))")"
|
||||
SOURCE_HOST="$(python3 -c "import json; d=json.load(open('${METADATA_FILE}')); print(d.get('source_host','?'))")"
|
||||
|
||||
echo " Format version: ${FORMAT_VERSION}"
|
||||
echo " Exported at: ${EXPORTED_AT}"
|
||||
echo " Source host: ${SOURCE_HOST}"
|
||||
echo " Expected nodes: ${EXPECTED_COUNT}"
|
||||
|
||||
if [[ "$FORMAT_VERSION" != "1" ]]; then
|
||||
echo "ERROR: Unsupported bundle format version: ${FORMAT_VERSION}" >&2
|
||||
echo " This tool supports version 1 only." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Validate checksum ──────────────────────────────────────────────────────────
|
||||
echo "Verifying checksum..."
|
||||
ACTUAL_CHECKSUM="$(openssl dgst -sha256 "$NODES_FILE" | awk '{print $NF}')"
|
||||
|
||||
if [[ "$ACTUAL_CHECKSUM" != "$STORED_CHECKSUM" ]]; then
|
||||
echo "ERROR: Checksum mismatch!" >&2
|
||||
echo " Expected: ${STORED_CHECKSUM}" >&2
|
||||
echo " Got: ${ACTUAL_CHECKSUM}" >&2
|
||||
echo " The bundle may be corrupted." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo " Checksum OK: ${ACTUAL_CHECKSUM:0:16}..."
|
||||
|
||||
# ── Verify node count ──────────────────────────────────────────────────────────
|
||||
echo "Verifying node count..."
|
||||
ACTUAL_COUNT="$(python3 -c "
|
||||
import json
|
||||
with open('${NODES_FILE}') as f:
|
||||
d = json.load(f)
|
||||
nodes = d.get('nodes', d if isinstance(d, list) else [])
|
||||
print(len(nodes) if isinstance(nodes, list) else len(nodes))
|
||||
" 2>/dev/null || echo "unknown")"
|
||||
|
||||
echo " Found ${ACTUAL_COUNT} nodes (expected ${EXPECTED_COUNT})"
|
||||
|
||||
if [[ "$ACTUAL_COUNT" != "$EXPECTED_COUNT" && "$EXPECTED_COUNT" != "unknown" ]]; then
|
||||
echo "WARNING: Node count mismatch (expected ${EXPECTED_COUNT}, found ${ACTUAL_COUNT})." >&2
|
||||
echo " Proceeding anyway — count may differ if nodes were deduplicated." >&2
|
||||
fi
|
||||
|
||||
# ── Dry run exit ───────────────────────────────────────────────────────────────
|
||||
if [[ $DRY_RUN -eq 1 ]]; then
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "DRY RUN complete. Bundle is valid."
|
||||
echo " Nodes: ${ACTUAL_COUNT}"
|
||||
echo " Checksum: verified"
|
||||
echo " Run without --dry-run to import."
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Safety confirmation ────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "WARNING: This will replace your current Neuron memory store."
|
||||
echo " Current snapshot: $ENGRAM_SNAPSHOT"
|
||||
echo " A backup will be created before replacing."
|
||||
echo ""
|
||||
read -r -p "Type 'yes' to continue: " CONFIRM
|
||||
if [[ "$CONFIRM" != "yes" ]]; then
|
||||
echo "Aborted."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Backup existing snapshot ───────────────────────────────────────────────────
|
||||
BACKUP_TIMESTAMP="$(date -u +"%Y%m%dT%H%M%SZ")"
|
||||
ENGRAM_DIR="$(dirname "$ENGRAM_SNAPSHOT")"
|
||||
BACKUP_PATH="${HOME}/.neuron/engram-backup-${BACKUP_TIMESTAMP}.tar.gz"
|
||||
|
||||
echo ""
|
||||
echo "Backing up current snapshot..."
|
||||
if [[ -f "$ENGRAM_SNAPSHOT" ]]; then
|
||||
(cd "$HOME/.neuron" && tar czf "$BACKUP_PATH" "$(basename "$ENGRAM_DIR")/snapshot.json" 2>/dev/null) || \
|
||||
cp "$ENGRAM_SNAPSHOT" "${ENGRAM_SNAPSHOT}.backup-${BACKUP_TIMESTAMP}"
|
||||
echo " Backup: $BACKUP_PATH"
|
||||
else
|
||||
echo " No existing snapshot to back up."
|
||||
fi
|
||||
|
||||
# ── Stop soul service ──────────────────────────────────────────────────────────
|
||||
echo "Stopping soul service (${SOUL_SERVICE})..."
|
||||
launchctl stop "$SOUL_SERVICE" 2>/dev/null || true
|
||||
# Also stop engram service if running
|
||||
launchctl stop "ai.neuron.engram" 2>/dev/null || true
|
||||
sleep 2
|
||||
echo " Soul stopped."
|
||||
|
||||
# ── Replace snapshot.json ──────────────────────────────────────────────────────
|
||||
echo "Installing new snapshot..."
|
||||
cp "$NODES_FILE" "$ENGRAM_SNAPSHOT"
|
||||
echo " snapshot.json replaced ($(du -sh "$ENGRAM_SNAPSHOT" | cut -f1))"
|
||||
|
||||
# ── Restart soul service ───────────────────────────────────────────────────────
|
||||
echo "Restarting soul service..."
|
||||
launchctl start "$SOUL_SERVICE" 2>/dev/null || true
|
||||
launchctl start "ai.neuron.engram" 2>/dev/null || true
|
||||
|
||||
# ── Wait for soul to come up ───────────────────────────────────────────────────
|
||||
echo "Waiting for soul to come up on port ${SOUL_PORT}..."
|
||||
ELAPSED=0
|
||||
SOUL_UP=0
|
||||
while [[ $ELAPSED -lt $SOUL_STARTUP_TIMEOUT ]]; do
|
||||
if curl -sf "http://localhost:${SOUL_PORT}/" > /dev/null 2>&1; then
|
||||
SOUL_UP=1
|
||||
break
|
||||
fi
|
||||
# Try a known endpoint that returns any response (even 404 means it's up)
|
||||
HTTP_CODE="$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${SOUL_PORT}/api/neuron/memory" 2>/dev/null || echo "000")"
|
||||
if [[ "$HTTP_CODE" != "000" ]]; then
|
||||
SOUL_UP=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
ELAPSED=$((ELAPSED + 1))
|
||||
done
|
||||
|
||||
if [[ $SOUL_UP -eq 1 ]]; then
|
||||
echo " Soul is up (responded in ${ELAPSED}s)."
|
||||
else
|
||||
echo " WARNING: Soul did not respond within ${SOUL_STARTUP_TIMEOUT}s."
|
||||
echo " The service may still be starting. Check: launchctl list | grep soul"
|
||||
fi
|
||||
|
||||
# ── Final report ───────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Import complete."
|
||||
echo " Nodes imported: ${ACTUAL_COUNT}"
|
||||
echo " Exported at: ${EXPORTED_AT}"
|
||||
echo " Source host: ${SOURCE_HOST}"
|
||||
echo " Backup: ${BACKUP_PATH}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
Executable
+135
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env bash
|
||||
# photo-to-memory.sh — OCR a document/photo and store the text in Neuron memory
|
||||
#
|
||||
# Uses GLM-OCR (0.9B, MIT) via mlx-vlm on Apple Silicon.
|
||||
# Model auto-downloads ~1.59 GB to ~/.cache/huggingface/ on first run.
|
||||
#
|
||||
# Usage:
|
||||
# ./tools/photo-to-memory.sh <image-file> [--dry-run] [--prompt "custom prompt"]
|
||||
#
|
||||
# Prerequisites:
|
||||
# pip install -U mlx-vlm
|
||||
#
|
||||
# Examples:
|
||||
# ./tools/photo-to-memory.sh ~/Desktop/receipt.jpg
|
||||
# ./tools/photo-to-memory.sh ~/Documents/contract.png --dry-run
|
||||
# ./tools/photo-to-memory.sh scan.jpg --prompt "Extract all text from this receipt"
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||
SOUL_URL="${SOUL_URL:-http://localhost:7770}"
|
||||
GLM_MODEL="${GLM_MODEL:-mlx-community/GLM-OCR-8bit}"
|
||||
MAX_TOKENS="${MAX_TOKENS:-4096}"
|
||||
DEFAULT_PROMPT="Extract all text from this document. Preserve structure including tables, headers, and lists. Output plain text."
|
||||
|
||||
# ── Colours ────────────────────────────────────────────────────────────────────
|
||||
RED=$'\033[0;31m'; GREEN=$'\033[0;32m'; YELLOW=$'\033[1;33m'
|
||||
CYAN=$'\033[0;36m'; BOLD=$'\033[1m'; RESET=$'\033[0m'
|
||||
|
||||
log() { printf "%s%s%s\n" "$CYAN" "$*" "$RESET"; }
|
||||
ok() { printf "%s✓ %s%s\n" "$GREEN" "$*" "$RESET"; }
|
||||
warn() { printf "%s⚠ %s%s\n" "$YELLOW" "$*" "$RESET"; }
|
||||
die() { printf "%s✗ %s%s\n" "$RED" "$*" "$RESET" >&2; exit 1; }
|
||||
|
||||
# ── Parse args ─────────────────────────────────────────────────────────────────
|
||||
IMAGE_PATH=""
|
||||
DRY_RUN=0
|
||||
CUSTOM_PROMPT=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run) DRY_RUN=1; shift ;;
|
||||
--prompt) CUSTOM_PROMPT="$2"; shift 2 ;;
|
||||
--model) GLM_MODEL="$2"; shift 2 ;;
|
||||
--help|-h)
|
||||
sed -n '2,15p' "$0" | sed 's/^# \{0,1\}//'
|
||||
exit 0
|
||||
;;
|
||||
-*) die "Unknown option: $1" ;;
|
||||
*)
|
||||
[[ -n "$IMAGE_PATH" ]] && die "Only one image file at a time"
|
||||
IMAGE_PATH="$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -z "$IMAGE_PATH" ]] && die "Usage: $0 <image-file> [--dry-run] [--prompt \"...\"]"
|
||||
[[ -f "$IMAGE_PATH" ]] || die "File not found: $IMAGE_PATH"
|
||||
|
||||
PROMPT="${CUSTOM_PROMPT:-$DEFAULT_PROMPT}"
|
||||
FILENAME=$(basename "$IMAGE_PATH")
|
||||
ABS_PATH=$(realpath "$IMAGE_PATH")
|
||||
|
||||
# ── Check runtime ───────────────────────────────────────────────────────────────
|
||||
if ! python3 -c "import mlx_vlm" 2>/dev/null; then
|
||||
warn "mlx-vlm not installed. Installing now..."
|
||||
pip install -q -U mlx-vlm || die "pip install mlx-vlm failed — run manually: pip install -U mlx-vlm"
|
||||
fi
|
||||
|
||||
# ── Run GLM-OCR ─────────────────────────────────────────────────────────────────
|
||||
log "Running GLM-OCR on: $FILENAME"
|
||||
log "Model: $GLM_MODEL"
|
||||
[[ "$DRY_RUN" -eq 1 ]] && warn "Dry-run mode — will not post to Neuron"
|
||||
|
||||
# GLM-OCR output goes to stdout; capture it
|
||||
# First run downloads ~1.59 GB — this is expected and cached thereafter.
|
||||
OCR_TEXT=$(python3 -m mlx_vlm.generate \
|
||||
--model "$GLM_MODEL" \
|
||||
--max-tokens "$MAX_TOKENS" \
|
||||
--temperature 0.0 \
|
||||
--prompt "$PROMPT" \
|
||||
--image "$ABS_PATH" \
|
||||
2>/dev/null) || die "GLM-OCR failed. Check that mlx-vlm is installed and the image is readable."
|
||||
|
||||
CHAR_COUNT=${#OCR_TEXT}
|
||||
log "OCR complete — extracted ${CHAR_COUNT} characters"
|
||||
|
||||
if [[ "$CHAR_COUNT" -lt 5 ]]; then
|
||||
warn "Very short output — the image may be blank or unreadable"
|
||||
fi
|
||||
|
||||
# ── Preview ─────────────────────────────────────────────────────────────────────
|
||||
printf "\n%s--- OCR output preview (first 400 chars) ---%s\n" "$BOLD" "$RESET"
|
||||
printf "%s\n" "${OCR_TEXT:0:400}"
|
||||
[[ "$CHAR_COUNT" -gt 400 ]] && printf "%s... [+%d more chars]%s\n" "$YELLOW" $((CHAR_COUNT - 400)) "$RESET"
|
||||
printf "\n"
|
||||
|
||||
# ── Post to Neuron soul ─────────────────────────────────────────────────────────
|
||||
if [[ "$DRY_RUN" -eq 1 ]]; then
|
||||
ok "Dry-run complete — would POST ${CHAR_COUNT} chars to ${SOUL_URL}/api/neuron/memory"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log "Posting to Neuron soul at ${SOUL_URL} ..."
|
||||
|
||||
PAYLOAD=$(python3 -c "
|
||||
import json, sys
|
||||
content = sys.argv[1]
|
||||
label = sys.argv[2]
|
||||
tags = ['photo-import', 'ocr', 'glm-ocr']
|
||||
print(json.dumps({'content': content, 'label': label, 'tags': tags}))
|
||||
" "$OCR_TEXT" "Photo: ${FILENAME}")
|
||||
|
||||
HTTP_STATUS=$(curl -s -o /tmp/photo-to-memory-response.json -w "%{http_code}" \
|
||||
-X POST "${SOUL_URL}/api/neuron/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PAYLOAD")
|
||||
|
||||
if [[ "$HTTP_STATUS" =~ ^2 ]]; then
|
||||
NODE_ID=$(python3 -c "
|
||||
import json, sys
|
||||
try:
|
||||
d = json.load(open('/tmp/photo-to-memory-response.json'))
|
||||
print(d.get('id', d.get('node_id', 'unknown')))
|
||||
except Exception:
|
||||
print('unknown')
|
||||
")
|
||||
ok "Memory node created: ${NODE_ID}"
|
||||
ok "Label: Photo: ${FILENAME}"
|
||||
ok "Tags: photo-import, ocr, glm-ocr"
|
||||
else
|
||||
BODY=$(cat /tmp/photo-to-memory-response.json 2>/dev/null || echo "(no body)")
|
||||
die "Soul returned HTTP ${HTTP_STATUS}: ${BODY}"
|
||||
fi
|
||||
Reference in New Issue
Block a user