// tests/test_agentic_tools.el // Tests for the agentic tools wiring (PR #19: fix/agentic-tools-all). // // Covers: // 1. agentic_tools_all() includes all literal tool names // 2. agentic_tools_all() includes the native web_search tool // 3. connector_tools_json() returns valid JSON ([] or array) even when bridge is down // 4. agentic_tools_all() output stays valid JSON when connector bridge is down // 5. tool_pending envelope detection — the pattern used in handle_dharma_room_turn_agentic // to distinguish a suspended agentic loop from a normal reply // 6. Empty-reply guard — json_get("reply") returns "" on a tool_pending envelope, // confirming that the guard is necessary to avoid silent empty responses // // Tests 5 and 6 validate the El-level logic that guards handle_dharma_room_turn_agentic // against silent failures after the refactor to use agentic_loop. // // Tests 1-4 are pure: no network, no LLM, no engram. // Tests 5-6 are pure string/JSON operations on synthesized envelopes. // // Integration tests (LLM-live) are documented as SKIP stubs because they // require a valid ANTHROPIC_API_KEY and a running soul + neuron-connectd. import "../chat.el" let pass_count: Int = 0 let fail_count: Int = 0 fn assert_eq(label: String, got: String, expected: String) -> Void { if str_eq(got, expected) { let pass_count = pass_count + 1 println(" PASS: " + label) } else { let fail_count = fail_count + 1 println(" FAIL: " + label) println(" got: " + got) println(" expected: " + expected) } } fn assert_true(label: String, cond: Bool) -> Void { if cond { let pass_count = pass_count + 1 println(" PASS: " + label) } else { let fail_count = fail_count + 1 println(" FAIL: " + label) } } fn assert_contains(label: String, haystack: String, needle: String) -> Void { if str_contains(haystack, needle) { let pass_count = pass_count + 1 println(" PASS: " + label) } else { let fail_count = fail_count + 1 println(" FAIL: " + label) println(" missing '" + needle + "' in: " + haystack) } } fn assert_not_empty(label: String, s: String) -> Void { if str_len(s) > 0 { let pass_count = pass_count + 1 println(" PASS: " + label) } else { let fail_count = fail_count + 1 println(" FAIL: " + label) println(" got empty string") } } // ── Section 1: agentic_tools_all contains all literal tool names ────────────── println("") println("1. agentic_tools_all() — contains all literal tool names") let all_tools: String = agentic_tools_all() assert_contains("contains read_file", all_tools, "\"name\":\"read_file\"") assert_contains("contains write_file", all_tools, "\"name\":\"write_file\"") assert_contains("contains web_get", all_tools, "\"name\":\"web_get\"") assert_contains("contains search_memory", all_tools, "\"name\":\"search_memory\"") assert_contains("contains run_command", all_tools, "\"name\":\"run_command\"") // ── Section 2: agentic_tools_all includes native web_search ────────────────── println("") println("2. agentic_tools_all() — includes native web_search_20250305 tool") assert_contains("contains web_search type", all_tools, "web_search_20250305") assert_contains("contains web_search name", all_tools, "\"name\":\"web_search\"") // ── Section 3: connector_tools_json returns valid JSON when bridge is down ──── println("") println("3. connector_tools_json() — returns [] when neuron-connectd is not running") // connector_tools_json() calls the bridge; in a unit-test environment it is // expected to return "[]" (graceful degradation). If the bridge IS running, // it returns a non-empty array — both are valid. let conn_tools: String = connector_tools_json() let starts_bracket: Bool = str_starts_with(conn_tools, "[") assert_true("connector_tools_json starts with [", starts_bracket) assert_not_empty("connector_tools_json is non-empty string", conn_tools) // ── Section 4: agentic_tools_all output is valid JSON array ────────────────── println("") println("4. agentic_tools_all() — output is a JSON array") assert_true("starts with [", str_starts_with(all_tools, "[")) // A JSON array ends with ] let last_char: String = str_slice(all_tools, str_len(all_tools) - 1, str_len(all_tools)) assert_eq("ends with ]", last_char, "]") // ── Section 5: tool_pending envelope detection ──────────────────────────────── // // This validates the detection logic added to handle_dharma_room_turn_agentic: // // let is_pending: Bool = str_eq(json_get(loop_result, "tool_pending"), "true") // || str_starts_with(loop_result, "{\"tool_pending\":true") // // When agentic_loop suspends for an MCP bridge tool it returns: // {"tool_pending":true,"session_id":"...","call_id":"...","tool_name":"...","tool_input":{...},...} // // json_get() on a Bool field may return "true" (string) or "" depending on El runtime. // The str_starts_with fallback guards against either representation. println("") println("5. tool_pending envelope detection patterns") let pending_envelope: String = "{\"tool_pending\":true,\"session_id\":\"dharma:br-1234-1\",\"call_id\":\"toolu_01\",\"tool_name\":\"mcp__filesystem__read\",\"tool_input\":{\"path\":\"/tmp/x\"},\"model\":\"claude-sonnet-4-5\",\"agentic\":true,\"tools_used\":[]}" let normal_envelope: String = "{\"reply\":\"Hello from the soul.\",\"model\":\"claude-sonnet-4-5\",\"agentic\":true,\"tools_used\":[]}" let error_envelope: String = "{\"error\":\"llm unavailable\",\"reply\":\"\"}" // str_starts_with fallback — always works regardless of how json_get handles bool assert_true("pending envelope: str_starts_with detects tool_pending=true", str_starts_with(pending_envelope, "{\"tool_pending\":true")) assert_true("normal reply: str_starts_with does not detect tool_pending", !str_starts_with(normal_envelope, "{\"tool_pending\":true")) assert_true("error envelope: str_starts_with does not detect tool_pending", !str_starts_with(error_envelope, "{\"tool_pending\":true")) // ── Section 6: empty-reply guard necessity ──────────────────────────────────── // // Confirms that json_get(pending_envelope, "reply") returns "" — proving the // empty-reply guard is necessary to avoid a silent success with empty response. // Without the guard, the old code would return {"response":"","cgi_id":"..."} which // is indistinguishable from a successful LLM response. println("") println("6. empty-reply guard — json_get(pending, \"reply\") is empty") let pending_reply: String = json_get(pending_envelope, "reply") assert_eq("json_get reply on pending envelope is empty", pending_reply, "") let normal_reply: String = json_get(normal_envelope, "reply") assert_not_empty("json_get reply on normal envelope is non-empty", normal_reply) // Also confirm error key absent from normal reply and pending envelopes let pending_error: String = json_get(pending_envelope, "error") assert_eq("pending envelope has no error key", pending_error, "") let normal_error: String = json_get(normal_envelope, "error") assert_eq("normal envelope has no error key", normal_error, "") // ── SKIP stubs: integration tests requiring live LLM ───────────────────────── println("") println("SKIP: handle_dharma_room_turn_agentic happy-path (requires ANTHROPIC_API_KEY + soul)") println(" Expected: non-empty response field and status ok") println("SKIP: handle_dharma_room_turn_agentic tool_pending propagation (requires API + MCP bridge)") println(" Expected: tool_pending in response when loop suspends for mcp__* tool") println("SKIP: handle_chat_agentic connector tools end-to-end (requires API + neuron-connectd)") println(" Expected: mcp__* tool names appear in tools_used when connectd is running") // ── Summary ─────────────────────────────────────────────────────────────────── println("") println("agentic tools tests: " + int_to_str(pass_count) + " passed, " + int_to_str(fail_count) + " failed")