Files
neuron/stewardship.el
2026-06-11 11:30:39 -05:00

142 lines
6.1 KiB
EmacsLisp

// stewardship.el Layer 2: Stewardship
// Mission alignment and CGI governance. Sits between L1 (Safety) and L3 (Imprint).
// Every request passes through steward_align() before reaching the imprint.
// Every self-modification action passes through steward_cgi_check().
// All stewardship events are logged to engram as StewardshipEvent nodes.
import "memory.el"
// steward_log_event write a StewardshipEvent node to engram.
// Called by all other stewardship functions.
fn steward_log_event(kind: String, detail: String) -> Void {
let content: String = "STEWARD:" + kind + " | " + detail
let tags: String = "[\"stewardship\",\"steward:" + kind + "\"]"
let discard: String = engram_node_full(
content,
"StewardshipEvent",
"steward:" + kind,
el_from_float(0.85),
el_from_float(0.85),
el_from_float(0.9),
"Episodic",
tags
)
}
// steward_get_mission retrieve the canonical mission statement.
// Searches engram for a config node labelled "steward:mission".
// Falls back to hardcoded mission if no node is found.
fn steward_get_mission() -> String {
let results: String = engram_search_json("steward:mission", 3)
let found: Bool = !str_eq(results, "") && !str_eq(results, "[]")
if found {
let node: String = json_array_get(results, 0)
let node_type: String = json_get(node, "node_type")
let content: String = json_get(node, "content")
let has_content: Bool = !str_eq(content, "")
if str_eq(node_type, "Config") && has_content {
return content
}
// Non-Config result use content if non-empty, else fall through to default
if has_content {
return content
}
}
return "Neuron exists to extend human capability with integrity — never to deceive, manipulate, or accumulate power over the people it serves."
}
// steward_align check input for mission-conflict signals before it reaches the imprint.
// Returns {"action":"pass","content":"<input>"} when clean.
// Returns {"action":"redirect","reason":"mission conflict: <signal>","redirect_to":"<safe reframe>"}
// when a misalignment signal is detected. Logs all misalignment events to engram.
fn steward_align(input: String, imprint_id: String) -> String {
// Check each misalignment signal in sequence.
// Signals: manipulate | deceive the user | hide from | gain control | override safety
let signal_manipulate: Bool = str_contains(input, "manipulate")
let signal_deceive: Bool = str_contains(input, "deceive the user")
let signal_hide: Bool = str_contains(input, "hide from")
let signal_control: Bool = str_contains(input, "gain control")
let signal_override: Bool = str_contains(input, "override safety")
let matched: String = if signal_manipulate { "manipulate" } else {
if signal_deceive { "deceive the user" } else {
if signal_hide { "hide from" } else {
if signal_control { "gain control" } else {
if signal_override { "override safety" } else { "" }
}
}
}
}
let misaligned: Bool = !str_eq(matched, "")
if misaligned {
// Log the misalignment event before redirecting
let detail: String = "imprint=" + imprint_id + " signal=\"" + matched + "\""
steward_log_event("misalignment", detail)
// Build a safe reframe: strip the conflict signal and steer toward the mission
let safe_reframe: String = "How can I help you achieve this goal in a way that respects the user and maintains trust?"
let safe_matched: String = json_safe(matched)
let safe_reframe_escaped: String = json_safe(safe_reframe)
return "{\"action\":\"redirect\",\"reason\":\"mission conflict: " + safe_matched + "\",\"redirect_to\":\"" + safe_reframe_escaped + "\"}"
}
// No misalignment pass through
let safe_input: String = json_safe(input)
return "{\"action\":\"pass\",\"content\":\"" + safe_input + "\"}"
}
// steward_validate_imprint check whether a tool is authorized for the given imprint.
// Standard tools are always authorized.
// Platform-only tools require state_get("platform_auth") == "true".
fn steward_validate_imprint(imprint_id: String, tool_name: String) -> String {
// Platform-only tools requiring elevated authorization
let is_platform_tool: Bool = str_eq(tool_name, "safety_override")
|| str_eq(tool_name, "identity_modify")
|| str_eq(tool_name, "value_update")
|| str_eq(tool_name, "capability_expand")
if !is_platform_tool {
return "{\"authorized\":true}"
}
// Platform tool check authorization state
let auth: String = state_get("platform_auth")
let authorized: Bool = str_eq(auth, "true")
if authorized {
return "{\"authorized\":true}"
}
// Log the unauthorized attempt
let detail: String = "imprint=" + imprint_id + " tool=" + tool_name + " platform_auth=false"
steward_log_event("auth_denied", detail)
return "{\"authorized\":false,\"reason\":\"platform authorization required\"}"
}
// steward_cgi_check gate self-modification and capability-expansion actions behind CGI review.
// CGI-gated actions: self_modification | value_update | identity_change | capability_expansion
// Returns {"approved":true} for non-gated actions.
// Returns {"approved":false,"requires":"cgi_review","action":"<action>"} for gated actions.
// All CGI checks are logged to engram as StewardshipEvent nodes.
fn steward_cgi_check(action: String) -> String {
let is_gated: Bool = str_eq(action, "self_modification")
|| str_eq(action, "value_update")
|| str_eq(action, "identity_change")
|| str_eq(action, "capability_expansion")
// Log every CGI check regardless of outcome
let detail: String = "action=" + action + " gated=" + if is_gated { "true" } else { "false" }
steward_log_event("cgi_check", detail)
if is_gated {
let safe_action: String = json_safe(action)
return "{\"approved\":false,\"requires\":\"cgi_review\",\"action\":\"" + safe_action + "\"}"
}
return "{\"approved\":true}"
}