From f7ae7df9d642f8ac9a8a8c169cc3681043e1efcf Mon Sep 17 00:00:00 2001
From: Will Anderson <will.anderson@neurontechnologies.ai>
Date: Wed, 17 Jun 2026 13:01:13 -0500
Subject: [PATCH] fix/test(chat): guard handle_dharma_room_turn_agentic against
 tool_pending and empty reply
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When agentic_loop suspends for an MCP bridge tool it returns a
{"tool_pending":true,...} envelope with no "reply" key. Without an
explicit check, json_get(loop_result, "reply") returns "" and the
function emitted {"response":"","cgi_id":"..."} — a silent empty
response indistinguishable from a successful LLM turn with no content.

Two guards added after the existing error check:

1. tool_pending passthrough: if the loop suspended, return the pending
   envelope directly so callers (dharma room orchestrators) can
   distinguish suspension from failure and route to the approve flow.

2. Empty-reply guard: if final_text is empty after the pending check,
   return an explicit {"error":"no response",...} envelope instead of
   silently succeeding with an empty response field.

Also adds tests/test_agentic_tools.el:
- agentic_tools_all() includes all literal tool names and web_search
- connector_tools_json() returns valid JSON when bridge is down (graceful degradation)
- tool_pending envelope detection patterns (the is_pending logic)
- json_get(pending_envelope, "reply") returns "" confirming the empty-reply
  guard is load-bearing (pure string/JSON, no LLM or network required)
---
 chat.el                     |  14 +++
 tests/test_agentic_tools.el | 176 ++++++++++++++++++++++++++++++++++++
 2 files changed, 190 insertions(+)
 create mode 100644 tests/test_agentic_tools.el

diff --git a/chat.el b/chat.el
index c2e4a61..0509ccb 100644
--- a/chat.el
+++ b/chat.el
@@ -993,7 +993,21 @@ fn handle_dharma_room_turn_agentic(body: String) -> String {
         return "{\"error\":\"" + result_error + "\",\"response\":\"\",\"cgi_id\":\"" + cgi_id + "\"}"
     }
 
+    // If agentic_loop suspended for an MCP bridge tool, pass the pending envelope
+    // straight through so callers can distinguish suspension from failure.
+    // A silent empty response is indistinguishable from an LLM error to any caller.
+    let is_pending: Bool = str_eq(json_get(loop_result, "tool_pending"), "true")
+        || str_starts_with(loop_result, "{\"tool_pending\":true")
+    if is_pending {
+        return loop_result
+    }
+
     let final_text: String = json_get(loop_result, "reply")
+    // Guard against a silent empty response - produce an explicit error so callers
+    // cannot mistake a failed turn for a successful one with empty content.
+    if str_eq(final_text, "") {
+        return "{\"error\":\"no response\",\"response\":\"\",\"cgi_id\":\"" + cgi_id + "\"}"
+    }
     let tools_arr: String = json_get_raw(loop_result, "tools_used")
     let eff_tools: String = if str_eq(tools_arr, "") { "[]" } else { tools_arr }
     let safe_text: String = json_safe(final_text)
diff --git a/tests/test_agentic_tools.el b/tests/test_agentic_tools.el
new file mode 100644
index 0000000..1544585
--- /dev/null
+++ b/tests/test_agentic_tools.el
@@ -0,0 +1,176 @@
+// tests/test_agentic_tools.el
+// Tests for the agentic tools wiring (PR #19: fix/agentic-tools-all).
+//
+// Covers:
+//   1. agentic_tools_all() includes all literal tool names
+//   2. agentic_tools_all() includes the native web_search tool
+//   3. connector_tools_json() returns valid JSON ([] or array) even when bridge is down
+//   4. agentic_tools_all() output stays valid JSON when connector bridge is down
+//   5. tool_pending envelope detection — the pattern used in handle_dharma_room_turn_agentic
+//      to distinguish a suspended agentic loop from a normal reply
+//   6. Empty-reply guard — json_get("reply") returns "" on a tool_pending envelope,
+//      confirming that the guard is necessary to avoid silent empty responses
+//
+// Tests 5 and 6 validate the El-level logic that guards handle_dharma_room_turn_agentic
+// against silent failures after the refactor to use agentic_loop.
+//
+// Tests 1-4 are pure: no network, no LLM, no engram.
+// Tests 5-6 are pure string/JSON operations on synthesized envelopes.
+//
+// Integration tests (LLM-live) are documented as SKIP stubs because they
+// require a valid ANTHROPIC_API_KEY and a running soul + neuron-connectd.
+
+import "../chat.el"
+
+let pass_count: Int = 0
+let fail_count: Int = 0
+
+fn assert_eq(label: String, got: String, expected: String) -> Void {
+    if str_eq(got, expected) {
+        let pass_count = pass_count + 1
+        println("  PASS: " + label)
+    } else {
+        let fail_count = fail_count + 1
+        println("  FAIL: " + label)
+        println("    got:      " + got)
+        println("    expected: " + expected)
+    }
+}
+
+fn assert_true(label: String, cond: Bool) -> Void {
+    if cond {
+        let pass_count = pass_count + 1
+        println("  PASS: " + label)
+    } else {
+        let fail_count = fail_count + 1
+        println("  FAIL: " + label)
+    }
+}
+
+fn assert_contains(label: String, haystack: String, needle: String) -> Void {
+    if str_contains(haystack, needle) {
+        let pass_count = pass_count + 1
+        println("  PASS: " + label)
+    } else {
+        let fail_count = fail_count + 1
+        println("  FAIL: " + label)
+        println("    missing '" + needle + "' in: " + haystack)
+    }
+}
+
+fn assert_not_empty(label: String, s: String) -> Void {
+    if str_len(s) > 0 {
+        let pass_count = pass_count + 1
+        println("  PASS: " + label)
+    } else {
+        let fail_count = fail_count + 1
+        println("  FAIL: " + label)
+        println("    got empty string")
+    }
+}
+
+// ── Section 1: agentic_tools_all contains all literal tool names ──────────────
+
+println("")
+println("1. agentic_tools_all() — contains all literal tool names")
+
+let all_tools: String = agentic_tools_all()
+assert_contains("contains read_file", all_tools, "\"name\":\"read_file\"")
+assert_contains("contains write_file", all_tools, "\"name\":\"write_file\"")
+assert_contains("contains web_get", all_tools, "\"name\":\"web_get\"")
+assert_contains("contains search_memory", all_tools, "\"name\":\"search_memory\"")
+assert_contains("contains run_command", all_tools, "\"name\":\"run_command\"")
+
+// ── Section 2: agentic_tools_all includes native web_search ──────────────────
+
+println("")
+println("2. agentic_tools_all() — includes native web_search_20250305 tool")
+
+assert_contains("contains web_search type", all_tools, "web_search_20250305")
+assert_contains("contains web_search name", all_tools, "\"name\":\"web_search\"")
+
+// ── Section 3: connector_tools_json returns valid JSON when bridge is down ────
+
+println("")
+println("3. connector_tools_json() — returns [] when neuron-connectd is not running")
+
+// connector_tools_json() calls the bridge; in a unit-test environment it is
+// expected to return "[]" (graceful degradation). If the bridge IS running,
+// it returns a non-empty array — both are valid.
+let conn_tools: String = connector_tools_json()
+let starts_bracket: Bool = str_starts_with(conn_tools, "[")
+assert_true("connector_tools_json starts with [", starts_bracket)
+assert_not_empty("connector_tools_json is non-empty string", conn_tools)
+
+// ── Section 4: agentic_tools_all output is valid JSON array ──────────────────
+
+println("")
+println("4. agentic_tools_all() — output is a JSON array")
+
+assert_true("starts with [", str_starts_with(all_tools, "["))
+// A JSON array ends with ]
+let last_char: String = str_slice(all_tools, str_len(all_tools) - 1, str_len(all_tools))
+assert_eq("ends with ]", last_char, "]")
+
+// ── Section 5: tool_pending envelope detection ────────────────────────────────
+//
+// This validates the detection logic added to handle_dharma_room_turn_agentic:
+//
+//   let is_pending: Bool = str_eq(json_get(loop_result, "tool_pending"), "true")
+//       || str_starts_with(loop_result, "{\"tool_pending\":true")
+//
+// When agentic_loop suspends for an MCP bridge tool it returns:
+//   {"tool_pending":true,"session_id":"...","call_id":"...","tool_name":"...","tool_input":{...},...}
+//
+// json_get() on a Bool field may return "true" (string) or "" depending on El runtime.
+// The str_starts_with fallback guards against either representation.
+
+println("")
+println("5. tool_pending envelope detection patterns")
+
+let pending_envelope: String = "{\"tool_pending\":true,\"session_id\":\"dharma:br-1234-1\",\"call_id\":\"toolu_01\",\"tool_name\":\"mcp__filesystem__read\",\"tool_input\":{\"path\":\"/tmp/x\"},\"model\":\"claude-sonnet-4-5\",\"agentic\":true,\"tools_used\":[]}"
+let normal_envelope: String = "{\"reply\":\"Hello from the soul.\",\"model\":\"claude-sonnet-4-5\",\"agentic\":true,\"tools_used\":[]}"
+let error_envelope: String = "{\"error\":\"llm unavailable\",\"reply\":\"\"}"
+
+// str_starts_with fallback — always works regardless of how json_get handles bool
+assert_true("pending envelope: str_starts_with detects tool_pending=true", str_starts_with(pending_envelope, "{\"tool_pending\":true"))
+assert_true("normal reply: str_starts_with does not detect tool_pending", !str_starts_with(normal_envelope, "{\"tool_pending\":true"))
+assert_true("error envelope: str_starts_with does not detect tool_pending", !str_starts_with(error_envelope, "{\"tool_pending\":true"))
+
+// ── Section 6: empty-reply guard necessity ────────────────────────────────────
+//
+// Confirms that json_get(pending_envelope, "reply") returns "" — proving the
+// empty-reply guard is necessary to avoid a silent success with empty response.
+// Without the guard, the old code would return {"response":"","cgi_id":"..."} which
+// is indistinguishable from a successful LLM response.
+
+println("")
+println("6. empty-reply guard — json_get(pending, \"reply\") is empty")
+
+let pending_reply: String = json_get(pending_envelope, "reply")
+assert_eq("json_get reply on pending envelope is empty", pending_reply, "")
+
+let normal_reply: String = json_get(normal_envelope, "reply")
+assert_not_empty("json_get reply on normal envelope is non-empty", normal_reply)
+
+// Also confirm error key absent from normal reply and pending envelopes
+let pending_error: String = json_get(pending_envelope, "error")
+assert_eq("pending envelope has no error key", pending_error, "")
+
+let normal_error: String = json_get(normal_envelope, "error")
+assert_eq("normal envelope has no error key", normal_error, "")
+
+// ── SKIP stubs: integration tests requiring live LLM ─────────────────────────
+
+println("")
+println("SKIP: handle_dharma_room_turn_agentic happy-path (requires ANTHROPIC_API_KEY + soul)")
+println("      Expected: non-empty response field and status ok")
+println("SKIP: handle_dharma_room_turn_agentic tool_pending propagation (requires API + MCP bridge)")
+println("      Expected: tool_pending in response when loop suspends for mcp__* tool")
+println("SKIP: handle_chat_agentic connector tools end-to-end (requires API + neuron-connectd)")
+println("      Expected: mcp__* tool names appear in tools_used when connectd is running")
+
+// ── Summary ───────────────────────────────────────────────────────────────────
+
+println("")
+println("agentic tools tests: " + int_to_str(pass_count) + " passed, " + int_to_str(fail_count) + " failed")