diff --git a/chat.el b/chat.el index fd8df3e..f1cf363 100644 --- a/chat.el +++ b/chat.el @@ -587,9 +587,13 @@ fn handle_chat(body: String) -> String { let raw_response: String = llm_call_system(model, full_system, message) + // Issue #5: also catch empty string — llm_extract_text() in el_runtime.c silently + // returns "" when the response content array is missing or all blocks fail to parse. + // Without this guard an empty reply passes through as a silent empty response. let is_error: Bool = str_starts_with(raw_response, "{\"error\"") || str_starts_with(raw_response, "{\"type\":\"error\"") || str_contains(raw_response, "authentication_error") + || str_eq(raw_response, "") if is_error { // Issue #6: LLM failure — HTTP 503 (service unavailable). return "{\"__status__\":503,\"error\":\"llm unavailable\",\"response\":\"\"}" @@ -662,6 +666,42 @@ fn studio_tools_json() -> String { "]" } +// --------------------------------------------------------------------------- +// LLM reliability — issues that require C runtime fixes (el_runtime.c). +// These cannot be addressed at the EL layer; they are documented here so the +// symptoms are traceable back to their root causes. +// +// Issue #1 (no retry on timeout/connection error): +// http_do() in el_runtime.c calls curl_easy_perform() once. On +// CURLE_OPERATION_TIMEDOUT / CURLE_COULDNT_CONNECT / CURLE_RECV_ERROR it +// returns http_error_json() with no retry. Fix: add a retry loop (max 3 +// attempts, exponential back-off starting at 1s) inside llm_provider_request(). +// +// Issue #2 (60s timeout applies to all HTTP calls including LLM): +// EL_HTTP_TIMEOUT_MS defaults to 60000ms for every http_do() call. +// Fix: introduce EL_LLM_TIMEOUT_MS (default 120000) used only by +// llm_provider_request(); leave EL_HTTP_TIMEOUT_MS (default 30000) for +// general service calls to avoid holding connections for 60s. +// +// Issue #3 (HTTP 429 causes silent provider failover, not backoff): +// llm_chain_call() advances to the next provider on any JSON-prefixed response +// including 429. Fix: parse HTTP status via curl_easy_getinfo; on 429 sleep +// Retry-After seconds (default 5s) then retry the same provider up to 3 times. +// +// Issue #4 (HTTP 500/502 crashes the request silently): +// Same path as #3 — 5xx responses cause immediate provider failover with no +// retry. Fix: retry with exponential back-off (1s, 2s, 4s) before advancing. +// +// Issue #6 (no secondary LLM fallback in production): +// Set NEURON_LLM_1_URL/KEY/FORMAT in ExternalSecret to a secondary provider +// (e.g. Gemini). No C code change required; llm_chain_call() already iterates. +// +// Issue #8 (LLM response size unbounded — memory-only cap): +// HttpBuf grows via realloc() with no hard limit. Fix: add +// EL_HTTP_MAX_RESPONSE_BYTES (default 10MiB) cap in httpbuf_append() and +// return http_error_json("response too large") on overflow. +// --------------------------------------------------------------------------- + fn agentic_api_key() -> String { let k1: String = env("ANTHROPIC_API_KEY") if !str_eq(k1, "") { @@ -713,7 +753,7 @@ fn agentic_tools_with_web() -> String { // Short timeout + empty-array fallback: if the bridge is down, the soul runs // exactly as before with only its built-in tools (graceful degradation). fn connector_tools_json() -> String { - let raw: String = exec_capture("curl -s --max-time 2 http://127.0.0.1:7771/mcp/tools") + let raw: String = exec_capture("curl -s --max-time 5 http://127.0.0.1:7771/mcp/tools") if str_eq(raw, "") { return "[]" } @@ -766,7 +806,7 @@ fn tool_auto_approved(tool_name: String) -> Bool { if !str_starts_with(tool_name, "mcp__") { return false } - let raw: String = exec_capture("curl -s --max-time 2 http://127.0.0.1:7771/mcp/auto-approved") + let raw: String = exec_capture("curl -s --max-time 5 http://127.0.0.1:7771/mcp/auto-approved") if str_eq(raw, "") { return false } @@ -1189,6 +1229,14 @@ fn agentic_loop(session_id: String, model: String, safe_sys: String, tools_json: let iteration: Int = 0 let keep_going: Bool = true + // Issue #9: agentic max_tokens configurable via NEURON_LLM_MAX_TOKENS env var. + // Default 4096 is marginal for long tool chains (8 iterations x 4096 tokens). + // Set to 8192+ for complex multi-step tasks. + // Note: llm_provider_request() in el_runtime.c also hardcodes 4096 for the + // llm_call_system() (non-agentic) path; that requires a C runtime change. + let max_tokens_env: String = env("NEURON_LLM_MAX_TOKENS") + let max_tokens_str: String = if str_eq(max_tokens_env, "") { "4096" } else { max_tokens_env } + // Suspension state — captured at top level so it escapes the while body. let pending: Bool = false let pend_tool_id: String = "" @@ -1197,7 +1245,7 @@ fn agentic_loop(session_id: String, model: String, safe_sys: String, tools_json: while keep_going && iteration < 8 { let req_body: String = "{\"model\":\"" + model + "\"" - + ",\"max_tokens\":4096" + + ",\"max_tokens\":" + max_tokens_str + ",\"system\":\"" + safe_sys + "\"" + ",\"tools\":" + tools_json + ",\"messages\":" + messages @@ -1477,9 +1525,11 @@ fn handle_chat_as_soul(body: String) -> String { let raw_response: String = llm_call_system(model, system_prompt, eff_message) + // Issue #5: empty string catch — same rationale as handle_chat. let is_error: Bool = str_starts_with(raw_response, "{\"error\"") || str_starts_with(raw_response, "{\"type\":\"error\"") || str_contains(raw_response, "authentication_error") + || str_eq(raw_response, "") if is_error { return "{\"error\":\"llm unavailable\",\"response\":\"\",\"speaker_slug\":\"" + speaker + "\",\"model\":\"" + model + "\"}" } @@ -1526,9 +1576,11 @@ fn handle_dharma_room_turn(body: String) -> String { let raw_response: String = llm_call_system(model, system_prompt, transcript) + // Issue #5: empty string catch — same rationale as handle_chat. let is_error: Bool = str_starts_with(raw_response, "{\"error\"") || str_starts_with(raw_response, "{\"type\":\"error\"") || str_contains(raw_response, "authentication_error") + || str_eq(raw_response, "") if is_error { return "{\"error\":\"llm unavailable\",\"response\":\"\",\"cgi_id\":\"" + cgi_id + "\"}" }