Merge PR #66: soul: inject current engine into system prompt for truthful self-report

Adds current_engine_note() to chat.el and appends it to the system prompt in handle_chat. Allows Neuron to answer 'what model am I running on?' accurately — the model id from the request body (or the configured default) is passed as a factual annotation rather than expecting the LLM to guess from training data.
2026-07-01 11:34:34 -05:00
parent 975bf2721b b24f6d645b
commit d67f4c8f08
1 changed files with 21 additions and 1 deletions
@@ -640,6 +640,21 @@ fn json_safe(s: String) -> String {
    return s4
 }

+// current_engine_note — a short, FACTUAL line appended to the system prompt so Neuron can answer
+// "what model/LLM are you running on?" truthfully. An LLM cannot know its own model from training
+// (the name/version is assigned AFTER training finishes), so the harness must tell it. This is
+// identity-consistent: the model is the ENGINE; the self (identity, values, memory) is layered on
+// top. ADDITIVE — it adds a fact, it does not alter identity, values, or the safety layer.
+fn current_engine_note(model: String) -> String {
+    if str_eq(model, "") {
+        return ""
+    }
+    return "\n\n[CURRENT ENGINE: this turn is generated by the underlying model \"" + model
+        + "\". It is the engine beneath your self — your identity, values, and memory are layered on"
+        + " top of it. If the user asks which model or LLM you are running on, answer with this model"
+        + " id plainly and truthfully; never guess a different one.]"
+}
+
 // build_system_prompt — assemble the system prompt for a chat turn.
 // chat_mode: Bool — pass true from handle_chat (no tools), false from agentic paths.
 // Issue #9 fix: no_tools_rule only included when chat_mode=true.
@@ -997,7 +1012,12 @@ fn handle_chat(body: String) -> String {
    }

    let ctx: String = engram_compile(activation_seed)
-    let system: String = affective_prefix + build_system_prompt(ctx, true)
+    // Tell the LLM which engine it is running on this turn, so it can answer truthfully instead of
+    // guessing. The per-turn model rides in the request body (concrete even under Auto routing);
+    // fall back to the configured default when blank.
+    let sp_req_model: String = json_get(body, "model")
+    let sp_model: String = if str_eq(sp_req_model, "") { chat_default_model() } else { sp_req_model }
+    let system: String = affective_prefix + build_system_prompt(ctx, true) + current_engine_note(sp_model)

    let seen_ids: String = state_get("engram_compile_seen_ids")