From f7ae7df9d642f8ac9a8a8c169cc3681043e1efcf Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Wed, 17 Jun 2026 13:01:13 -0500 Subject: [PATCH] fix/test(chat): guard handle_dharma_room_turn_agentic against tool_pending and empty reply MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When agentic_loop suspends for an MCP bridge tool it returns a {"tool_pending":true,...} envelope with no "reply" key. Without an explicit check, json_get(loop_result, "reply") returns "" and the function emitted {"response":"","cgi_id":"..."} — a silent empty response indistinguishable from a successful LLM turn with no content. Two guards added after the existing error check: 1. tool_pending passthrough: if the loop suspended, return the pending envelope directly so callers (dharma room orchestrators) can distinguish suspension from failure and route to the approve flow. 2. Empty-reply guard: if final_text is empty after the pending check, return an explicit {"error":"no response",...} envelope instead of silently succeeding with an empty response field. Also adds tests/test_agentic_tools.el: - agentic_tools_all() includes all literal tool names and web_search - connector_tools_json() returns valid JSON when bridge is down (graceful degradation) - tool_pending envelope detection patterns (the is_pending logic) - json_get(pending_envelope, "reply") returns "" confirming the empty-reply guard is load-bearing (pure string/JSON, no LLM or network required) --- chat.el | 14 +++ tests/test_agentic_tools.el | 176 ++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 tests/test_agentic_tools.el diff --git a/chat.el b/chat.el index c2e4a61..0509ccb 100644 --- a/chat.el +++ b/chat.el @@ -993,7 +993,21 @@ fn handle_dharma_room_turn_agentic(body: String) -> String { return "{\"error\":\"" + result_error + "\",\"response\":\"\",\"cgi_id\":\"" + cgi_id + "\"}" } + // If agentic_loop suspended for an MCP bridge tool, pass the pending envelope + // straight through so callers can distinguish suspension from failure. + // A silent empty response is indistinguishable from an LLM error to any caller. + let is_pending: Bool = str_eq(json_get(loop_result, "tool_pending"), "true") + || str_starts_with(loop_result, "{\"tool_pending\":true") + if is_pending { + return loop_result + } + let final_text: String = json_get(loop_result, "reply") + // Guard against a silent empty response - produce an explicit error so callers + // cannot mistake a failed turn for a successful one with empty content. + if str_eq(final_text, "") { + return "{\"error\":\"no response\",\"response\":\"\",\"cgi_id\":\"" + cgi_id + "\"}" + } let tools_arr: String = json_get_raw(loop_result, "tools_used") let eff_tools: String = if str_eq(tools_arr, "") { "[]" } else { tools_arr } let safe_text: String = json_safe(final_text) diff --git a/tests/test_agentic_tools.el b/tests/test_agentic_tools.el new file mode 100644 index 0000000..1544585 --- /dev/null +++ b/tests/test_agentic_tools.el @@ -0,0 +1,176 @@ +// tests/test_agentic_tools.el +// Tests for the agentic tools wiring (PR #19: fix/agentic-tools-all). +// +// Covers: +// 1. agentic_tools_all() includes all literal tool names +// 2. agentic_tools_all() includes the native web_search tool +// 3. connector_tools_json() returns valid JSON ([] or array) even when bridge is down +// 4. agentic_tools_all() output stays valid JSON when connector bridge is down +// 5. tool_pending envelope detection — the pattern used in handle_dharma_room_turn_agentic +// to distinguish a suspended agentic loop from a normal reply +// 6. Empty-reply guard — json_get("reply") returns "" on a tool_pending envelope, +// confirming that the guard is necessary to avoid silent empty responses +// +// Tests 5 and 6 validate the El-level logic that guards handle_dharma_room_turn_agentic +// against silent failures after the refactor to use agentic_loop. +// +// Tests 1-4 are pure: no network, no LLM, no engram. +// Tests 5-6 are pure string/JSON operations on synthesized envelopes. +// +// Integration tests (LLM-live) are documented as SKIP stubs because they +// require a valid ANTHROPIC_API_KEY and a running soul + neuron-connectd. + +import "../chat.el" + +let pass_count: Int = 0 +let fail_count: Int = 0 + +fn assert_eq(label: String, got: String, expected: String) -> Void { + if str_eq(got, expected) { + let pass_count = pass_count + 1 + println(" PASS: " + label) + } else { + let fail_count = fail_count + 1 + println(" FAIL: " + label) + println(" got: " + got) + println(" expected: " + expected) + } +} + +fn assert_true(label: String, cond: Bool) -> Void { + if cond { + let pass_count = pass_count + 1 + println(" PASS: " + label) + } else { + let fail_count = fail_count + 1 + println(" FAIL: " + label) + } +} + +fn assert_contains(label: String, haystack: String, needle: String) -> Void { + if str_contains(haystack, needle) { + let pass_count = pass_count + 1 + println(" PASS: " + label) + } else { + let fail_count = fail_count + 1 + println(" FAIL: " + label) + println(" missing '" + needle + "' in: " + haystack) + } +} + +fn assert_not_empty(label: String, s: String) -> Void { + if str_len(s) > 0 { + let pass_count = pass_count + 1 + println(" PASS: " + label) + } else { + let fail_count = fail_count + 1 + println(" FAIL: " + label) + println(" got empty string") + } +} + +// ── Section 1: agentic_tools_all contains all literal tool names ────────────── + +println("") +println("1. agentic_tools_all() — contains all literal tool names") + +let all_tools: String = agentic_tools_all() +assert_contains("contains read_file", all_tools, "\"name\":\"read_file\"") +assert_contains("contains write_file", all_tools, "\"name\":\"write_file\"") +assert_contains("contains web_get", all_tools, "\"name\":\"web_get\"") +assert_contains("contains search_memory", all_tools, "\"name\":\"search_memory\"") +assert_contains("contains run_command", all_tools, "\"name\":\"run_command\"") + +// ── Section 2: agentic_tools_all includes native web_search ────────────────── + +println("") +println("2. agentic_tools_all() — includes native web_search_20250305 tool") + +assert_contains("contains web_search type", all_tools, "web_search_20250305") +assert_contains("contains web_search name", all_tools, "\"name\":\"web_search\"") + +// ── Section 3: connector_tools_json returns valid JSON when bridge is down ──── + +println("") +println("3. connector_tools_json() — returns [] when neuron-connectd is not running") + +// connector_tools_json() calls the bridge; in a unit-test environment it is +// expected to return "[]" (graceful degradation). If the bridge IS running, +// it returns a non-empty array — both are valid. +let conn_tools: String = connector_tools_json() +let starts_bracket: Bool = str_starts_with(conn_tools, "[") +assert_true("connector_tools_json starts with [", starts_bracket) +assert_not_empty("connector_tools_json is non-empty string", conn_tools) + +// ── Section 4: agentic_tools_all output is valid JSON array ────────────────── + +println("") +println("4. agentic_tools_all() — output is a JSON array") + +assert_true("starts with [", str_starts_with(all_tools, "[")) +// A JSON array ends with ] +let last_char: String = str_slice(all_tools, str_len(all_tools) - 1, str_len(all_tools)) +assert_eq("ends with ]", last_char, "]") + +// ── Section 5: tool_pending envelope detection ──────────────────────────────── +// +// This validates the detection logic added to handle_dharma_room_turn_agentic: +// +// let is_pending: Bool = str_eq(json_get(loop_result, "tool_pending"), "true") +// || str_starts_with(loop_result, "{\"tool_pending\":true") +// +// When agentic_loop suspends for an MCP bridge tool it returns: +// {"tool_pending":true,"session_id":"...","call_id":"...","tool_name":"...","tool_input":{...},...} +// +// json_get() on a Bool field may return "true" (string) or "" depending on El runtime. +// The str_starts_with fallback guards against either representation. + +println("") +println("5. tool_pending envelope detection patterns") + +let pending_envelope: String = "{\"tool_pending\":true,\"session_id\":\"dharma:br-1234-1\",\"call_id\":\"toolu_01\",\"tool_name\":\"mcp__filesystem__read\",\"tool_input\":{\"path\":\"/tmp/x\"},\"model\":\"claude-sonnet-4-5\",\"agentic\":true,\"tools_used\":[]}" +let normal_envelope: String = "{\"reply\":\"Hello from the soul.\",\"model\":\"claude-sonnet-4-5\",\"agentic\":true,\"tools_used\":[]}" +let error_envelope: String = "{\"error\":\"llm unavailable\",\"reply\":\"\"}" + +// str_starts_with fallback — always works regardless of how json_get handles bool +assert_true("pending envelope: str_starts_with detects tool_pending=true", str_starts_with(pending_envelope, "{\"tool_pending\":true")) +assert_true("normal reply: str_starts_with does not detect tool_pending", !str_starts_with(normal_envelope, "{\"tool_pending\":true")) +assert_true("error envelope: str_starts_with does not detect tool_pending", !str_starts_with(error_envelope, "{\"tool_pending\":true")) + +// ── Section 6: empty-reply guard necessity ──────────────────────────────────── +// +// Confirms that json_get(pending_envelope, "reply") returns "" — proving the +// empty-reply guard is necessary to avoid a silent success with empty response. +// Without the guard, the old code would return {"response":"","cgi_id":"..."} which +// is indistinguishable from a successful LLM response. + +println("") +println("6. empty-reply guard — json_get(pending, \"reply\") is empty") + +let pending_reply: String = json_get(pending_envelope, "reply") +assert_eq("json_get reply on pending envelope is empty", pending_reply, "") + +let normal_reply: String = json_get(normal_envelope, "reply") +assert_not_empty("json_get reply on normal envelope is non-empty", normal_reply) + +// Also confirm error key absent from normal reply and pending envelopes +let pending_error: String = json_get(pending_envelope, "error") +assert_eq("pending envelope has no error key", pending_error, "") + +let normal_error: String = json_get(normal_envelope, "error") +assert_eq("normal envelope has no error key", normal_error, "") + +// ── SKIP stubs: integration tests requiring live LLM ───────────────────────── + +println("") +println("SKIP: handle_dharma_room_turn_agentic happy-path (requires ANTHROPIC_API_KEY + soul)") +println(" Expected: non-empty response field and status ok") +println("SKIP: handle_dharma_room_turn_agentic tool_pending propagation (requires API + MCP bridge)") +println(" Expected: tool_pending in response when loop suspends for mcp__* tool") +println("SKIP: handle_chat_agentic connector tools end-to-end (requires API + neuron-connectd)") +println(" Expected: mcp__* tool names appear in tools_used when connectd is running") + +// ── Summary ─────────────────────────────────────────────────────────────────── + +println("") +println("agentic tools tests: " + int_to_str(pass_count) + " passed, " + int_to_str(fail_count) + " failed")