soul: OpenAI-compatible provider path for chat (Ollama/OpenAI/Grok/Gemini) — v1 basic completion

Additive, Anthropic path untouched + default. When NEURON_LLM_0_FORMAT=openai and NEURON_LLM_0_URL set, basic chat turns build an OpenAI chat/completions request and parse choices[0].message.content. v1 = plain completion, NO tools/agentic loop yet (follow-up). Unblocks all OpenAI-format providers at once. PARSES (elc chat.el exit 0); NOT yet built/tested — needs the soul rebuild (dist/soul.c) + E2E. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 18:52:26 -05:00
1 changed files with 88 additions and 1 deletions
@@ -1202,6 +1202,86 @@ fn agentic_api_key() -> String {
    return env("NEURON_LLM_0_KEY")
 }

+// ── OpenAI-compatible providers (Ollama / OpenAI / Grok / Gemini) ──────────────────────────────
+// The brain speaks Anthropic's Messages format by default. When the active provider uses the
+// OpenAI-compatible wire format (NEURON_LLM_0_FORMAT=openai) with a configured base URL
+// (NEURON_LLM_0_URL, e.g. http://localhost:11434/v1 for local Ollama), basic chat turns are served
+// here instead of the Anthropic agentic loop.
+// v1 SCOPE: plain chat completion only — NO tools / agentic loop yet (that is a follow-up port).
+// This block is ADDITIVE: the Anthropic path is untouched and stays the default.
+
+fn llm_base_url() -> String {
+    return env("NEURON_LLM_0_URL")
+}
+
+fn llm_wire_format() -> String {
+    let f: String = env("NEURON_LLM_0_FORMAT")
+    if str_eq(f, "") {
+        return "anthropic"
+    }
+    return f
+}
+
+// Escape a decoded string so it can be embedded back into a JSON string literal.
+fn json_escape(s: String) -> String {
+    let a: String = str_replace(s, "\\", "\\\\")
+    let b: String = str_replace(a, "\"", "\\\"")
+    let c: String = str_replace(b, "\n", "\\n")
+    let d: String = str_replace(c, "\r", "\\r")
+    return d
+}
+
+// Basic (non-agentic) chat completion against an OpenAI-compatible endpoint.
+// [safe_sys] is already JSON-escaped; [messages_json] is the same JSON array the Anthropic path
+// builds (e.g. [{"role":"user","content":"..."}]). Returns the soul's standard {"reply":"..."}.
+fn openai_chat_complete(model: String, base_url: String, api_key: String, safe_sys: String, messages_json: String) -> String {
+    // Prepend the system prompt as an OpenAI "system" message, then the existing turn array.
+    let inner: String = if json_array_len(messages_json) > 0 {
+        str_slice(messages_json, 1, str_len(messages_json) - 1)
+    } else {
+        ""
+    }
+    let msgs: String = if str_eq(inner, "") {
+        "[{\"role\":\"system\",\"content\":\"" + safe_sys + "\"}]"
+    } else {
+        "[{\"role\":\"system\",\"content\":\"" + safe_sys + "\"}," + inner + "]"
+    }
+    let req_body: String = "{\"model\":\"" + model + "\""
+        + ",\"max_tokens\":4096"
+        + ",\"messages\":" + msgs
+        + "}"
+
+    let h: Map = {}
+    map_set(h, "content-type", "application/json")
+    // Ollama needs no key; OpenAI / Grok / Gemini use a Bearer token.
+    if !str_eq(api_key, "") {
+        map_set(h, "Authorization", "Bearer " + api_key)
+    }
+
+    let url: String = base_url + "/chat/completions"
+    let raw_resp: String = http_post_with_headers(url, req_body, h)
+
+    let is_error: Bool = str_starts_with(raw_resp, "{\"error\"") || str_contains(raw_resp, "\"error\":")
+    if is_error {
+        return "{\"error\":\"llm unavailable\",\"reply\":\"\"}"
+    }
+
+    // Parse OpenAI response shape: choices[0].message.content
+    let choices: String = json_get_raw(raw_resp, "choices")
+    let eff_choices: String = if str_eq(choices, "") {
+        "[]"
+    } else {
+        choices
+    }
+    if json_array_len(eff_choices) < 1 {
+        return "{\"error\":\"empty response\",\"reply\":\"\"}"
+    }
+    let first: String = json_array_get(eff_choices, 0)
+    let message: String = json_get_raw(first, "message")
+    let content: String = json_get(message, "content")
+    return "{\"reply\":\"" + json_escape(content) + "\",\"tools_used\":[]}"
+}
+
 fn agentic_tools_literal() -> String {
    return "[" +
        "{\"name\":\"read_file\",\"description\":\"Read contents of a file from disk.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"path\":{\"type\":\"string\",\"description\":\"Absolute file path\"}},\"required\":[\"path\"]}}," +
@@ -1704,7 +1784,14 @@ fn handle_chat_agentic(body: String) -> String {

    // Use caller-supplied session_id if provided, otherwise generate a bridge id.
    let session_id: String = if str_eq(req_session, "") { next_bridge_id() } else { req_session }
-    let result: String = agentic_loop(session_id, model, safe_sys, tools_json, messages, h, "")
+    // Provider fork: OpenAI-compatible providers (Ollama/OpenAI/Grok/Gemini) take the plain-completion
+    // path (v1, no tools); everything else stays on the Anthropic agentic loop (the default).
+    let use_openai: Bool = !str_eq(llm_base_url(), "") && str_eq(llm_wire_format(), "openai")
+    let result: String = if use_openai {
+        openai_chat_complete(model, llm_base_url(), agentic_api_key(), safe_sys, messages)
+    } else {
+        agentic_loop(session_id, model, safe_sys, tools_json, messages, h, "")
+    }

    // Persist the exchange to session/global history for thread continuity on next turn.
    // Only save when the loop completed (reply present), not when tool_pending.