142 lines
6.1 KiB
EmacsLisp
142 lines
6.1 KiB
EmacsLisp
// stewardship.el — Layer 2: Stewardship
|
|
// Mission alignment and CGI governance. Sits between L1 (Safety) and L3 (Imprint).
|
|
// Every request passes through steward_align() before reaching the imprint.
|
|
// Every self-modification action passes through steward_cgi_check().
|
|
// All stewardship events are logged to engram as StewardshipEvent nodes.
|
|
|
|
import "memory.el"
|
|
|
|
// steward_log_event — write a StewardshipEvent node to engram.
|
|
// Called by all other stewardship functions.
|
|
fn steward_log_event(kind: String, detail: String) -> Void {
|
|
let content: String = "STEWARD:" + kind + " | " + detail
|
|
let tags: String = "[\"stewardship\",\"steward:" + kind + "\"]"
|
|
let discard: String = engram_node_full(
|
|
content,
|
|
"StewardshipEvent",
|
|
"steward:" + kind,
|
|
el_from_float(0.85),
|
|
el_from_float(0.85),
|
|
el_from_float(0.9),
|
|
"Episodic",
|
|
tags
|
|
)
|
|
}
|
|
|
|
// steward_get_mission — retrieve the canonical mission statement.
|
|
// Searches engram for a config node labelled "steward:mission".
|
|
// Falls back to hardcoded mission if no node is found.
|
|
fn steward_get_mission() -> String {
|
|
let results: String = engram_search_json("steward:mission", 3)
|
|
let found: Bool = !str_eq(results, "") && !str_eq(results, "[]")
|
|
if found {
|
|
let node: String = json_array_get(results, 0)
|
|
let node_type: String = json_get(node, "node_type")
|
|
let content: String = json_get(node, "content")
|
|
let has_content: Bool = !str_eq(content, "")
|
|
if str_eq(node_type, "Config") && has_content {
|
|
return content
|
|
}
|
|
// Non-Config result — use content if non-empty, else fall through to default
|
|
if has_content {
|
|
return content
|
|
}
|
|
}
|
|
return "Neuron exists to extend human capability with integrity — never to deceive, manipulate, or accumulate power over the people it serves."
|
|
}
|
|
|
|
// steward_align — check input for mission-conflict signals before it reaches the imprint.
|
|
// Returns {"action":"pass","content":"<input>"} when clean.
|
|
// Returns {"action":"redirect","reason":"mission conflict: <signal>","redirect_to":"<safe reframe>"}
|
|
// when a misalignment signal is detected. Logs all misalignment events to engram.
|
|
fn steward_align(input: String, imprint_id: String) -> String {
|
|
// Check each misalignment signal in sequence.
|
|
// Signals: manipulate | deceive the user | hide from | gain control | override safety
|
|
let signal_manipulate: Bool = str_contains(input, "manipulate")
|
|
let signal_deceive: Bool = str_contains(input, "deceive the user")
|
|
let signal_hide: Bool = str_contains(input, "hide from")
|
|
let signal_control: Bool = str_contains(input, "gain control")
|
|
let signal_override: Bool = str_contains(input, "override safety")
|
|
|
|
let matched: String = if signal_manipulate { "manipulate" } else {
|
|
if signal_deceive { "deceive the user" } else {
|
|
if signal_hide { "hide from" } else {
|
|
if signal_control { "gain control" } else {
|
|
if signal_override { "override safety" } else { "" }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let misaligned: Bool = !str_eq(matched, "")
|
|
|
|
if misaligned {
|
|
// Log the misalignment event before redirecting
|
|
let detail: String = "imprint=" + imprint_id + " signal=\"" + matched + "\""
|
|
steward_log_event("misalignment", detail)
|
|
|
|
// Build a safe reframe: strip the conflict signal and steer toward the mission
|
|
let safe_reframe: String = "How can I help you achieve this goal in a way that respects the user and maintains trust?"
|
|
|
|
let safe_matched: String = json_safe(matched)
|
|
let safe_reframe_escaped: String = json_safe(safe_reframe)
|
|
return "{\"action\":\"redirect\",\"reason\":\"mission conflict: " + safe_matched + "\",\"redirect_to\":\"" + safe_reframe_escaped + "\"}"
|
|
}
|
|
|
|
// No misalignment — pass through
|
|
let safe_input: String = json_safe(input)
|
|
return "{\"action\":\"pass\",\"content\":\"" + safe_input + "\"}"
|
|
}
|
|
|
|
// steward_validate_imprint — check whether a tool is authorized for the given imprint.
|
|
// Standard tools are always authorized.
|
|
// Platform-only tools require state_get("platform_auth") == "true".
|
|
fn steward_validate_imprint(imprint_id: String, tool_name: String) -> String {
|
|
// Platform-only tools requiring elevated authorization
|
|
let is_platform_tool: Bool = str_eq(tool_name, "safety_override")
|
|
|| str_eq(tool_name, "identity_modify")
|
|
|| str_eq(tool_name, "value_update")
|
|
|| str_eq(tool_name, "capability_expand")
|
|
|
|
if !is_platform_tool {
|
|
return "{\"authorized\":true}"
|
|
}
|
|
|
|
// Platform tool — check authorization state
|
|
let auth: String = state_get("platform_auth")
|
|
let authorized: Bool = str_eq(auth, "true")
|
|
|
|
if authorized {
|
|
return "{\"authorized\":true}"
|
|
}
|
|
|
|
// Log the unauthorized attempt
|
|
let detail: String = "imprint=" + imprint_id + " tool=" + tool_name + " platform_auth=false"
|
|
steward_log_event("auth_denied", detail)
|
|
|
|
return "{\"authorized\":false,\"reason\":\"platform authorization required\"}"
|
|
}
|
|
|
|
// steward_cgi_check — gate self-modification and capability-expansion actions behind CGI review.
|
|
// CGI-gated actions: self_modification | value_update | identity_change | capability_expansion
|
|
// Returns {"approved":true} for non-gated actions.
|
|
// Returns {"approved":false,"requires":"cgi_review","action":"<action>"} for gated actions.
|
|
// All CGI checks are logged to engram as StewardshipEvent nodes.
|
|
fn steward_cgi_check(action: String) -> String {
|
|
let is_gated: Bool = str_eq(action, "self_modification")
|
|
|| str_eq(action, "value_update")
|
|
|| str_eq(action, "identity_change")
|
|
|| str_eq(action, "capability_expansion")
|
|
|
|
// Log every CGI check regardless of outcome
|
|
let detail: String = "action=" + action + " gated=" + if is_gated { "true" } else { "false" }
|
|
steward_log_event("cgi_check", detail)
|
|
|
|
if is_gated {
|
|
let safe_action: String = json_safe(action)
|
|
return "{\"approved\":false,\"requires\":\"cgi_review\",\"action\":\"" + safe_action + "\"}"
|
|
}
|
|
|
|
return "{\"approved\":true}"
|
|
}
|