From 2b8062c55f812c25958325194f504ec93af9631c Mon Sep 17 00:00:00 2001 From: Tim Lingo Date: Tue, 9 Jun 2026 08:02:46 -0500 Subject: [PATCH 1/2] fix(runtime): handle multi-byte UTF-8 in JSON string escaping Validate UTF-8 continuation bytes in jb_emit_escaped; pass valid sequences through and escape orphaned/invalid start bytes as \u00xx. Pre-existing change found uncommitted in the working tree; committed here so it is reviewable rather than lost. Co-Authored-By: Claude Opus 4.8 (1M context) --- lang/el-compiler/runtime/el_runtime.c | 46 +++++++++++++++++++++------ 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/lang/el-compiler/runtime/el_runtime.c b/lang/el-compiler/runtime/el_runtime.c index df83e95..f544846 100644 --- a/lang/el-compiler/runtime/el_runtime.c +++ b/lang/el-compiler/runtime/el_runtime.c @@ -3135,23 +3135,49 @@ static void jb_puts(JsonBuf* b, const char* s) { static void jb_emit_escaped(JsonBuf* b, const char* s) { jb_putc(b, '"'); - for (; *s; s++) { - unsigned char c = (unsigned char)*s; + const unsigned char* p = (const unsigned char*)s; + while (*p) { + unsigned char c = *p; switch (c) { - case '"': jb_puts(b, "\\\""); break; - case '\\': jb_puts(b, "\\\\"); break; - case '\b': jb_puts(b, "\\b"); break; - case '\f': jb_puts(b, "\\f"); break; - case '\n': jb_puts(b, "\\n"); break; - case '\r': jb_puts(b, "\\r"); break; - case '\t': jb_puts(b, "\\t"); break; + case '"': jb_puts(b, "\\\""); p++; break; + case '\\': jb_puts(b, "\\\\"); p++; break; + case '\b': jb_puts(b, "\\b"); p++; break; + case '\f': jb_puts(b, "\\f"); p++; break; + case '\n': jb_puts(b, "\\n"); p++; break; + case '\r': jb_puts(b, "\\r"); p++; break; + case '\t': jb_puts(b, "\\t"); p++; break; default: if (c < 0x20) { char tmp[8]; snprintf(tmp, sizeof(tmp), "\\u%04x", c); jb_puts(b, tmp); - } else { + p++; + } else if (c < 0x80) { jb_putc(b, (char)c); + p++; + } else { + /* Multi-byte UTF-8: validate sequence, pass through if valid, + * escape as \u00xx if the start byte is invalid/orphaned. */ + int seq_len = 0; + if ((c & 0xE0) == 0xC0) seq_len = 2; + else if ((c & 0xF0) == 0xE0) seq_len = 3; + else if ((c & 0xF8) == 0xF0) seq_len = 4; + if (seq_len >= 2) { + int valid = 1; + for (int i = 1; i < seq_len; i++) { + if ((p[i] & 0xC0) != 0x80) { valid = 0; break; } + } + if (valid) { + for (int i = 0; i < seq_len; i++) jb_putc(b, (char)p[i]); + p += seq_len; + break; + } + } + /* Invalid start byte or truncated sequence — escape it */ + char tmp[8]; + snprintf(tmp, sizeof(tmp), "\\u%04x", c); + jb_puts(b, tmp); + p++; } break; } -- 2.52.0 From dbf2c659d9bf0384bbf1a3008565de47aa5b064f Mon Sep 17 00:00:00 2001 From: Tim Lingo Date: Tue, 9 Jun 2026 08:03:56 -0500 Subject: [PATCH 2/2] fix(runtime): pass model through to the LLM API instead of dropping it llm_call_system / llm_call accepted a model argument and discarded it: they called llm_chain_call(system, user) with no model, and the legacy ANTHROPIC_API_KEY fallback passed NULL to llm_provider_request, so every non-agentic chat was pinned to LLM_DEFAULT_MODEL (claude-sonnet-4-5) regardless of the caller's selection. Thread model_pref through llm_chain_call: provider-chain entries still honor their own NEURON_LLM_N_MODEL override and fall back to the requested model otherwise; the legacy Anthropic path now uses the requested model. NULL/empty preserves prior default behavior. Effect: the soul's model selection (state soul_model / SOUL_LLM_MODEL, e.g. claude-opus-4-8) now reaches api.anthropic.com. Previously the chat response echoed the selected model in its label while the request billed Sonnet 4.5. Not built locally (no elc/cc toolchain on this checkout); needs stage CI. Co-Authored-By: Claude Opus 4.8 (1M context) --- lang/el-compiler/runtime/el_runtime.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lang/el-compiler/runtime/el_runtime.c b/lang/el-compiler/runtime/el_runtime.c index f544846..7d47e17 100644 --- a/lang/el-compiler/runtime/el_runtime.c +++ b/lang/el-compiler/runtime/el_runtime.c @@ -8473,7 +8473,7 @@ static el_val_t llm_provider_request(const char* url, const char* key, } } -static el_val_t llm_chain_call(const char* system_str, const char* user_str) { +static el_val_t llm_chain_call(const char* model_pref, const char* system_str, const char* user_str) { char url_key[64], key_key[64], fmt_key[64], model_key[64]; for (int i = 0; i < LLM_MAX_PROVIDERS; i++) { snprintf(url_key, sizeof(url_key), "NEURON_LLM_%d_URL", i); @@ -8486,6 +8486,7 @@ static el_val_t llm_chain_call(const char* system_str, const char* user_str) { const char* fmt_s = getenv(fmt_key); int fmt = (fmt_s && strcmp(fmt_s, "anthropic") == 0) ? 1 : 0; const char* model = getenv(model_key); + if (!model || !*model) model = model_pref; /* fall back to the caller-requested model */ fprintf(stderr, "[llm] trying provider %d (%s)\n", i, url); el_val_t result = llm_provider_request(url, key, fmt, model, system_str, user_str); const char* t = EL_CSTR(result); @@ -8496,7 +8497,7 @@ static el_val_t llm_chain_call(const char* system_str, const char* user_str) { const char* api_key = getenv("ANTHROPIC_API_KEY"); if (!api_key || !*api_key) return http_error_json("no LLM providers configured"); fprintf(stderr, "[llm] using legacy ANTHROPIC_API_KEY fallback\n"); - return llm_provider_request(LLM_API_URL, api_key, 1, NULL, system_str, user_str); + return llm_provider_request(LLM_API_URL, api_key, 1, model_pref, system_str, user_str); } /* Legacy llm_request — kept for backward compat with agentic loop internals */ @@ -8560,14 +8561,16 @@ static el_val_t llm_extract_text(el_val_t resp_val) { } el_val_t llm_call(el_val_t model, el_val_t prompt) { + const char* m = EL_CSTR(model); const char* u = EL_CSTR(prompt); if (!u) u = ""; - return llm_chain_call(NULL, u); + return llm_chain_call(m, NULL, u); } el_val_t llm_call_system(el_val_t model, el_val_t system_prompt, el_val_t user_prompt) { + const char* m = EL_CSTR(model); const char* s = EL_CSTR(system_prompt); if (!s) s = ""; const char* u = EL_CSTR(user_prompt); if (!u) u = ""; - return llm_chain_call(s, u); + return llm_chain_call(m, s, u); } /* ── Tool registry for llm_call_agentic ─────────────────────────────────── */ -- 2.52.0