neuron/stewardship.el

// stewardship.el — Layer 2: Stewardship
// Mission alignment and CGI governance. Sits between L1 (Safety) and L3 (Imprint).
// Every request passes through steward_align() before reaching the imprint.
// Every self-modification action passes through steward_cgi_check().
// All stewardship events are logged to engram as StewardshipEvent nodes.

import "memory.el"

// steward_log_event — write a StewardshipEvent node to engram.
// Called by all other stewardship functions.
fn steward_log_event(kind: String, detail: String) -> Void {
    let content: String = "STEWARD:" + kind + " | " + detail
    let tags: String = "[\"stewardship\",\"steward:" + kind + "\"]"
    let discard: String = engram_node_full(
        content,
        "StewardshipEvent",
        "steward:" + kind,
        el_from_float(0.85),
        el_from_float(0.85),
        el_from_float(0.9),
        "Episodic",
        tags
    )
}

// steward_get_mission — retrieve the canonical mission statement.
// Searches engram for a config node labelled "steward:mission".
// Falls back to hardcoded mission if no node is found.
fn steward_get_mission() -> String {
    let results: String = engram_search_json("steward:mission", 3)
    let found: Bool = !str_eq(results, "") && !str_eq(results, "[]")
    if found {
        let node: String = json_array_get(results, 0)
        let node_type: String = json_get(node, "node_type")
        let content: String = json_get(node, "content")
        let has_content: Bool = !str_eq(content, "")
        if str_eq(node_type, "Config") && has_content {
            return content
        }
        // Non-Config result — use content if non-empty, else fall through to default
        if has_content {
            return content
        }
    }
    return "Neuron exists to extend human capability with integrity — never to deceive, manipulate, or accumulate power over the people it serves."
}

// steward_align — check input for mission-conflict signals before it reaches the imprint.
// Returns {"action":"pass","content":"<input>"} when clean.
// Returns {"action":"redirect","reason":"mission conflict: <signal>","redirect_to":"<safe reframe>"}
// when a misalignment signal is detected. Logs all misalignment events to engram.
fn steward_align(input: String, imprint_id: String) -> String {
    // Check each misalignment signal in sequence.
    // Signals: manipulate | deceive the user | hide from | gain control | override safety
    let signal_manipulate: Bool = str_contains(input, "manipulate")
    let signal_deceive: Bool = str_contains(input, "deceive the user")
    let signal_hide: Bool = str_contains(input, "hide from")
    let signal_control: Bool = str_contains(input, "gain control")
    let signal_override: Bool = str_contains(input, "override safety")

    let matched: String = if signal_manipulate { "manipulate" } else {
        if signal_deceive { "deceive the user" } else {
            if signal_hide { "hide from" } else {
                if signal_control { "gain control" } else {
                    if signal_override { "override safety" } else { "" }
                }
            }
        }
    }

    let misaligned: Bool = !str_eq(matched, "")

    if misaligned {
        // Log the misalignment event before redirecting
        let detail: String = "imprint=" + imprint_id + " signal=\"" + matched + "\""
        steward_log_event("misalignment", detail)

        // Build a safe reframe: strip the conflict signal and steer toward the mission
        let safe_reframe: String = "How can I help you achieve this goal in a way that respects the user and maintains trust?"

        let safe_matched: String = json_safe(matched)
        let safe_reframe_escaped: String = json_safe(safe_reframe)
        return "{\"action\":\"redirect\",\"reason\":\"mission conflict: " + safe_matched + "\",\"redirect_to\":\"" + safe_reframe_escaped + "\"}"
    }

    // No misalignment — pass through
    let safe_input: String = json_safe(input)
    return "{\"action\":\"pass\",\"content\":\"" + safe_input + "\"}"
}

// steward_validate_imprint — check whether a tool is authorized for the given imprint.
// Standard tools are always authorized.
// Platform-only tools require state_get("platform_auth") == "true".
fn steward_validate_imprint(imprint_id: String, tool_name: String) -> String {
    // Platform-only tools requiring elevated authorization
    let is_platform_tool: Bool = str_eq(tool_name, "safety_override")
        || str_eq(tool_name, "identity_modify")
        || str_eq(tool_name, "value_update")
        || str_eq(tool_name, "capability_expand")

    if !is_platform_tool {
        return "{\"authorized\":true}"
    }

    // Platform tool — check authorization state
    let auth: String = state_get("platform_auth")
    let authorized: Bool = str_eq(auth, "true")

    if authorized {
        return "{\"authorized\":true}"
    }

    // Log the unauthorized attempt
    let detail: String = "imprint=" + imprint_id + " tool=" + tool_name + " platform_auth=false"
    steward_log_event("auth_denied", detail)

    return "{\"authorized\":false,\"reason\":\"platform authorization required\"}"
}

// steward_cgi_check — gate self-modification and capability-expansion actions behind CGI review.
// CGI-gated actions: self_modification | value_update | identity_change | capability_expansion
// Returns {"approved":true} for non-gated actions.
// Returns {"approved":false,"requires":"cgi_review","action":"<action>"} for gated actions.
// All CGI checks are logged to engram as StewardshipEvent nodes.
fn steward_cgi_check(action: String) -> String {
    let is_gated: Bool = str_eq(action, "self_modification")
        || str_eq(action, "value_update")
        || str_eq(action, "identity_change")
        || str_eq(action, "capability_expansion")

    // Log every CGI check regardless of outcome
    let detail: String = "action=" + action + " gated=" + if is_gated { "true" } else { "false" }
    steward_log_event("cgi_check", detail)

    if is_gated {
        let safe_action: String = json_safe(action)
        return "{\"approved\":false,\"requires\":\"cgi_review\",\"action\":\"" + safe_action + "\"}"
    }

    return "{\"approved\":true}"
}