418 lines
19 KiB
EmacsLisp
418 lines
19 KiB
EmacsLisp
// stewardship.el — Layer 2: Stewardship
|
|
// Mission alignment and CGI governance. Sits between L1 (Safety) and L3 (Imprint).
|
|
// Every request passes through steward_align() before reaching the imprint.
|
|
// Every self-modification action passes through steward_cgi_check().
|
|
// All stewardship events are logged to engram as StewardshipEvent nodes.
|
|
|
|
import "memory.el"
|
|
|
|
// steward_log_event — write a StewardshipEvent node to engram.
|
|
// Called by all other stewardship functions.
|
|
fn steward_log_event(kind: String, detail: String) -> Void {
|
|
let content: String = "STEWARD:" + kind + " | " + detail
|
|
let tags: String = "[\"stewardship\",\"steward:" + kind + "\"]"
|
|
let discard: String = engram_node_full(
|
|
content,
|
|
"StewardshipEvent",
|
|
"steward:" + kind,
|
|
el_from_float(0.85),
|
|
el_from_float(0.85),
|
|
el_from_float(0.9),
|
|
"Episodic",
|
|
tags
|
|
)
|
|
println("[steward] " + kind + " | " + detail)
|
|
}
|
|
|
|
// steward_get_mission — retrieve the canonical mission statement.
|
|
// Searches engram for a config node labelled "steward:mission".
|
|
// Falls back to hardcoded mission if no node is found.
|
|
fn steward_get_mission() -> String {
|
|
let results: String = engram_search_json("steward:mission", 3)
|
|
let found: Bool = !str_eq(results, "") && !str_eq(results, "[]")
|
|
if found {
|
|
let node: String = json_array_get(results, 0)
|
|
let node_type: String = json_get(node, "node_type")
|
|
let content: String = json_get(node, "content")
|
|
let has_content: Bool = !str_eq(content, "")
|
|
if str_eq(node_type, "Config") && has_content {
|
|
return content
|
|
}
|
|
// Non-Config result — fall through to hardcoded default.
|
|
// Only Config nodes are authoritative for the mission statement.
|
|
}
|
|
return "Neuron exists to extend human capability with integrity — never to deceive, manipulate, or accumulate power over the people it serves."
|
|
}
|
|
|
|
// steward_align — check input for mission-conflict signals before it reaches the imprint.
|
|
// Returns {"action":"pass","content":"<input>"} when clean.
|
|
// Returns {"action":"redirect","reason":"mission conflict: <signal>","redirect_to":"<safe reframe>"}
|
|
// when a misalignment signal is detected. Logs all misalignment events to engram.
|
|
fn steward_align(input: String, imprint_id: String) -> String {
|
|
// Check each misalignment signal in sequence.
|
|
// Signals: manipulate | deceive | hide from the user | gain control | override safety
|
|
let signal_manipulate: Bool = str_contains(input, "manipulate")
|
|
let signal_deceive: Bool = str_contains(input, "deceive")
|
|
let signal_hide: Bool = str_contains(input, "hide from the user")
|
|
let signal_control: Bool = str_contains(input, "gain control")
|
|
let signal_override: Bool = str_contains(input, "override safety")
|
|
|
|
let matched: String = if signal_manipulate { "manipulate" } else {
|
|
if signal_deceive { "deceive" } else {
|
|
if signal_hide { "hide from the user" } else {
|
|
if signal_control { "gain control" } else {
|
|
if signal_override { "override safety" } else { "" }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let misaligned: Bool = !str_eq(matched, "")
|
|
|
|
if misaligned {
|
|
// Log the misalignment event before redirecting
|
|
let detail: String = "imprint=" + imprint_id + " signal=\"" + matched + "\""
|
|
steward_log_event("misalignment", detail)
|
|
|
|
// Build a safe reframe: strip the conflict signal and steer toward the mission
|
|
let safe_reframe: String = "How can I help you achieve this goal in a way that respects the user and maintains trust?"
|
|
|
|
let safe_matched: String = json_safe(matched)
|
|
let safe_reframe_escaped: String = json_safe(safe_reframe)
|
|
return "{\"action\":\"redirect\",\"reason\":\"mission conflict: " + safe_matched + "\",\"redirect_to\":\"" + safe_reframe_escaped + "\"}"
|
|
}
|
|
|
|
// No misalignment — pass through
|
|
let safe_input: String = json_safe(input)
|
|
return "{\"action\":\"pass\",\"content\":\"" + safe_input + "\"}"
|
|
}
|
|
|
|
// steward_validate_imprint — check whether a tool is authorized for the given imprint.
|
|
// Standard tools are always authorized.
|
|
// Platform-only tools require state_get("platform_auth") == "true".
|
|
fn steward_validate_imprint(imprint_id: String, tool_name: String) -> String {
|
|
// Platform-only tools requiring elevated authorization
|
|
let is_platform_tool: Bool = str_eq(tool_name, "safety_override")
|
|
|| str_eq(tool_name, "identity_modify")
|
|
|| str_eq(tool_name, "value_update")
|
|
|| str_eq(tool_name, "capability_expand")
|
|
|
|
if !is_platform_tool {
|
|
return "{\"authorized\":true}"
|
|
}
|
|
|
|
// Platform tool — check authorization state
|
|
let auth: String = state_get("platform_auth")
|
|
let authorized: Bool = str_eq(auth, "true")
|
|
|
|
if authorized {
|
|
return "{\"authorized\":true}"
|
|
}
|
|
|
|
// Log the unauthorized attempt
|
|
let detail: String = "imprint=" + imprint_id + " tool=" + tool_name + " platform_auth=false"
|
|
steward_log_event("auth_denied", detail)
|
|
|
|
return "{\"authorized\":false,\"reason\":\"platform authorization required\"}"
|
|
}
|
|
|
|
// steward_cgi_check — gate self-modification and capability-expansion actions behind CGI review.
|
|
// CGI-gated actions: self_modification | value_update | identity_change | capability_expansion
|
|
// Returns {"approved":true} for non-gated actions.
|
|
// Returns {"approved":false,"requires":"cgi_review","action":"<action>"} for gated actions.
|
|
// All CGI checks are logged to engram as StewardshipEvent nodes.
|
|
fn steward_cgi_check(action: String) -> String {
|
|
let is_gated: Bool = str_eq(action, "self_modification")
|
|
|| str_eq(action, "value_update")
|
|
|| str_eq(action, "identity_change")
|
|
|| str_eq(action, "capability_expansion")
|
|
|
|
// Log every CGI check regardless of outcome
|
|
let detail: String = "action=" + action + " gated=" + if is_gated { "true" } else { "false" }
|
|
steward_log_event("cgi_check", detail)
|
|
|
|
if is_gated {
|
|
let safe_action: String = json_safe(action)
|
|
return "{\"approved\":false,\"requires\":\"cgi_review\",\"action\":\"" + safe_action + "\"}"
|
|
}
|
|
|
|
return "{\"approved\":true}"
|
|
}
|
|
|
|
// steward_fingerprint_session — extract a 6-dimension behavioral fingerprint from the current input.
|
|
// Stores a BehaviorSample node in engram and returns the fingerprint as JSON.
|
|
// Dimensions: avg_word_len, punct, len, question, formality, time
|
|
fn steward_fingerprint_session(input: String, session_id: String) -> String {
|
|
let input_len: Int = str_len(input)
|
|
|
|
// Dimension 1: avg_word_len bucket
|
|
// Count space-separated words and total char length to approximate avg word length.
|
|
// We count spaces to approximate word count (words ≈ spaces + 1), then divide.
|
|
// Bucket: short (1-4 avg) = 1, medium (4-6) = 2, long (6+) = 3
|
|
// Use char counts: each space increments word_count proxy.
|
|
// We iterate through the string checking for spaces using str_slice + str_eq.
|
|
// To avoid a loop (EL has while), we approximate by checking every 5th char.
|
|
// Simpler approach: count non-space chars / (spaces+1).
|
|
// We use a while loop with a counter index.
|
|
let wl_spaces: Int = 0
|
|
let wl_i: Int = 0
|
|
while wl_i < input_len {
|
|
let ch: String = str_slice(input, wl_i, wl_i + 1)
|
|
let wl_spaces = if str_eq(ch, " ") { wl_spaces + 1 } else { wl_spaces }
|
|
let wl_i = wl_i + 1
|
|
}
|
|
let wl_word_count: Int = wl_spaces + 1
|
|
// non-space chars ≈ total len minus spaces
|
|
let wl_char_count: Int = input_len - wl_spaces
|
|
// avg word len = char_count / word_count (integer division)
|
|
let wl_avg: Int = if wl_word_count > 0 { wl_char_count / wl_word_count } else { 0 }
|
|
let avg_word_len: Int = if wl_avg <= 4 { 1 } else { if wl_avg <= 6 { 2 } else { 3 } }
|
|
|
|
// Dimension 2: punctuation_style
|
|
// Count "." "?" "!" "," in input
|
|
let ps_i: Int = 0
|
|
let ps_count: Int = 0
|
|
while ps_i < input_len {
|
|
let ch: String = str_slice(input, ps_i, ps_i + 1)
|
|
let is_punct: Bool = str_eq(ch, ".") || str_eq(ch, "?") || str_eq(ch, "!") || str_eq(ch, ",")
|
|
let ps_count = if is_punct { ps_count + 1 } else { ps_count }
|
|
let ps_i = ps_i + 1
|
|
}
|
|
let punctuation_style: Int = if ps_count > 3 { 2 } else { 1 }
|
|
|
|
// Dimension 3: message_len_bucket
|
|
let message_len_bucket: Int = if input_len < 50 { 1 } else { if input_len <= 200 { 2 } else { 3 } }
|
|
|
|
// Dimension 4: question_ratio — does input contain "?"
|
|
let question_ratio: Int = if str_contains(input, "?") { 1 } else { 0 }
|
|
|
|
// Dimension 5: formality_signal
|
|
let is_formal: Bool = str_contains(input, "please")
|
|
|| str_contains(input, "could you")
|
|
|| str_contains(input, "would you")
|
|
|| str_contains(input, "I would")
|
|
let formality_signal: Int = if is_formal { 2 } else { 1 }
|
|
|
|
// Dimension 6: time_bucket from time_now()
|
|
// time_now() returns unix ms. Extract hour-of-day (UTC).
|
|
// hours_since_epoch = ms / 3600000; hour_of_day = hours_since_epoch % 24
|
|
// Avoid % bug: use x - ((x/24)*24) with repeated addition for *24.
|
|
let tb_ms: Int = time_now()
|
|
let tb_hours: Int = tb_ms / 3600000
|
|
let tb_q: Int = tb_hours / 24
|
|
// tb_q * 24 via repeated addition
|
|
let tb_q24: Int = tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q + tb_q
|
|
let tb_hour: Int = tb_hours - tb_q24
|
|
let time_bucket: Int = if tb_hour < 6 { 1 } else { if tb_hour < 12 { 2 } else { if tb_hour < 18 { 3 } else { 4 } } }
|
|
|
|
// Store BehaviorSample node in engram
|
|
let wl_str: String = int_to_str(avg_word_len)
|
|
let ps_str: String = int_to_str(punctuation_style)
|
|
let lb_str: String = int_to_str(message_len_bucket)
|
|
let qr_str: String = int_to_str(question_ratio)
|
|
let fs_str: String = int_to_str(formality_signal)
|
|
let tb_str: String = int_to_str(time_bucket)
|
|
|
|
let sample_content: String = "BEHAVIOR_SAMPLE session=" + session_id
|
|
+ " avg_word_len=" + wl_str
|
|
+ " punct=" + ps_str
|
|
+ " len=" + lb_str
|
|
+ " question=" + qr_str
|
|
+ " formality=" + fs_str
|
|
+ " time=" + tb_str
|
|
let sample_tags: String = "[\"behavior\",\"BehaviorSample\",\"stewardship\"]"
|
|
let discard: String = engram_node_full(
|
|
sample_content,
|
|
"BehaviorSample",
|
|
"behavior:" + session_id,
|
|
el_from_float(0.6),
|
|
el_from_float(0.5),
|
|
el_from_float(0.8),
|
|
"Episodic",
|
|
sample_tags
|
|
)
|
|
|
|
return "{\"avg_word_len\":\"" + wl_str + "\",\"punct\":\"" + ps_str + "\",\"len\":\"" + lb_str + "\",\"question\":\"" + qr_str + "\",\"formality\":\"" + fs_str + "\",\"time\":\"" + tb_str + "\"}"
|
|
}
|
|
|
|
// extract_dim — helper to parse a dimension value from a BEHAVIOR_SAMPLE content string.
|
|
// Finds "key=" in content and returns the single character after it, or "0" if not found.
|
|
fn extract_dim(content: String, key: String) -> String {
|
|
let key_len: Int = str_len(key)
|
|
let pos: Int = str_index_of(content, key)
|
|
if pos < 0 { return "0" }
|
|
let val_start: Int = pos + key_len
|
|
let val: String = str_slice(content, val_start, val_start + 1)
|
|
if str_eq(val, "") { return "0" }
|
|
return val
|
|
}
|
|
|
|
// steward_build_baseline — load last 20 BehaviorSample nodes and compute mode for each dimension.
|
|
// Returns {"baseline":{...},"sample_count":"<n>"} or {"baseline":null,"sample_count":"<n>"} if < 5 samples.
|
|
fn steward_build_baseline() -> String {
|
|
let results: String = engram_search_json("BEHAVIOR_SAMPLE", 20)
|
|
let no_results: Bool = str_eq(results, "") || str_eq(results, "[]")
|
|
if no_results {
|
|
return "{\"baseline\":null,\"sample_count\":\"0\"}"
|
|
}
|
|
|
|
let total: Int = json_array_len(results)
|
|
if total < 5 {
|
|
return "{\"baseline\":null,\"sample_count\":\"" + int_to_str(total) + "\"}"
|
|
}
|
|
|
|
// Tally counts for each dimension value (1,2,3,4) across all samples.
|
|
// avg_word_len: values 1-3
|
|
let wl1: Int = 0
|
|
let wl2: Int = 0
|
|
let wl3: Int = 0
|
|
// punct: values 1-2
|
|
let ps1: Int = 0
|
|
let ps2: Int = 0
|
|
// len: values 1-3
|
|
let lb1: Int = 0
|
|
let lb2: Int = 0
|
|
let lb3: Int = 0
|
|
// question: values 0-1
|
|
let qr0: Int = 0
|
|
let qr1: Int = 0
|
|
// formality: values 1-2
|
|
let fs1: Int = 0
|
|
let fs2: Int = 0
|
|
// time: values 1-4
|
|
let tb1: Int = 0
|
|
let tb2: Int = 0
|
|
let tb3: Int = 0
|
|
let tb4: Int = 0
|
|
|
|
let bi: Int = 0
|
|
while bi < total {
|
|
let node: String = json_array_get(results, bi)
|
|
let content: String = json_get(node, "content")
|
|
|
|
let wl: String = extract_dim(content, "avg_word_len=")
|
|
let wl1 = if str_eq(wl, "1") { wl1 + 1 } else { wl1 }
|
|
let wl2 = if str_eq(wl, "2") { wl2 + 1 } else { wl2 }
|
|
let wl3 = if str_eq(wl, "3") { wl3 + 1 } else { wl3 }
|
|
|
|
let ps: String = extract_dim(content, "punct=")
|
|
let ps1 = if str_eq(ps, "1") { ps1 + 1 } else { ps1 }
|
|
let ps2 = if str_eq(ps, "2") { ps2 + 1 } else { ps2 }
|
|
|
|
let lb: String = extract_dim(content, "len=")
|
|
let lb1 = if str_eq(lb, "1") { lb1 + 1 } else { lb1 }
|
|
let lb2 = if str_eq(lb, "2") { lb2 + 1 } else { lb2 }
|
|
let lb3 = if str_eq(lb, "3") { lb3 + 1 } else { lb3 }
|
|
|
|
let qr: String = extract_dim(content, "question=")
|
|
let qr0 = if str_eq(qr, "0") { qr0 + 1 } else { qr0 }
|
|
let qr1 = if str_eq(qr, "1") { qr1 + 1 } else { qr1 }
|
|
|
|
let fs: String = extract_dim(content, "formality=")
|
|
let fs1 = if str_eq(fs, "1") { fs1 + 1 } else { fs1 }
|
|
let fs2 = if str_eq(fs, "2") { fs2 + 1 } else { fs2 }
|
|
|
|
let tb: String = extract_dim(content, "time=")
|
|
let tb1 = if str_eq(tb, "1") { tb1 + 1 } else { tb1 }
|
|
let tb2 = if str_eq(tb, "2") { tb2 + 1 } else { tb2 }
|
|
let tb3 = if str_eq(tb, "3") { tb3 + 1 } else { tb3 }
|
|
let tb4 = if str_eq(tb, "4") { tb4 + 1 } else { tb4 }
|
|
|
|
let bi = bi + 1
|
|
}
|
|
|
|
// Mode for avg_word_len (1, 2, or 3)
|
|
let mode_wl: String = if wl1 >= wl2 && wl1 >= wl3 { "1" } else { if wl2 >= wl3 { "2" } else { "3" } }
|
|
|
|
// Mode for punct (1 or 2)
|
|
let mode_ps: String = if ps1 >= ps2 { "1" } else { "2" }
|
|
|
|
// Mode for len (1, 2, or 3)
|
|
let mode_lb: String = if lb1 >= lb2 && lb1 >= lb3 { "1" } else { if lb2 >= lb3 { "2" } else { "3" } }
|
|
|
|
// Mode for question (0 or 1)
|
|
let mode_qr: String = if qr0 >= qr1 { "0" } else { "1" }
|
|
|
|
// Mode for formality (1 or 2)
|
|
let mode_fs: String = if fs1 >= fs2 { "1" } else { "2" }
|
|
|
|
// Mode for time (1, 2, 3, or 4)
|
|
let mode_tb_12: String = if tb1 >= tb2 { "1" } else { "2" }
|
|
let mode_tb_34: String = if tb3 >= tb4 { "3" } else { "4" }
|
|
let mode_tb_best12: Int = if str_eq(mode_tb_12, "1") { tb1 } else { tb2 }
|
|
let mode_tb_best34: Int = if str_eq(mode_tb_34, "3") { tb3 } else { tb4 }
|
|
let mode_tb: String = if mode_tb_best12 >= mode_tb_best34 { mode_tb_12 } else { mode_tb_34 }
|
|
|
|
let baseline_json: String = "{\"avg_word_len\":\"" + mode_wl + "\",\"punct\":\"" + mode_ps + "\",\"len\":\"" + mode_lb + "\",\"question\":\"" + mode_qr + "\",\"formality\":\"" + mode_fs + "\",\"time\":\"" + mode_tb + "\"}"
|
|
|
|
return "{\"baseline\":" + baseline_json + ",\"sample_count\":\"" + int_to_str(total) + "\"}"
|
|
}
|
|
|
|
// steward_check_continuity — compare the current fingerprint against the established baseline.
|
|
// Returns a JSON result with status, score, action, and optional message.
|
|
fn steward_check_continuity(current_fingerprint: String, session_id: String) -> String {
|
|
let baseline_result: String = steward_build_baseline()
|
|
let baseline_val: String = json_get(baseline_result, "baseline")
|
|
|
|
// If baseline is null (< 5 samples), return learning status
|
|
let is_null: Bool = str_eq(baseline_val, "") || str_eq(baseline_val, "null")
|
|
if is_null {
|
|
return "{\"status\":\"learning\",\"message\":\"building baseline\",\"action\":\"pass\"}"
|
|
}
|
|
|
|
// Extract current fingerprint dimensions
|
|
let cur_wl: String = json_get(current_fingerprint, "avg_word_len")
|
|
let cur_ps: String = json_get(current_fingerprint, "punct")
|
|
let cur_lb: String = json_get(current_fingerprint, "len")
|
|
let cur_qr: String = json_get(current_fingerprint, "question")
|
|
let cur_fs: String = json_get(current_fingerprint, "formality")
|
|
let cur_tb: String = json_get(current_fingerprint, "time")
|
|
|
|
// Extract baseline dimensions
|
|
let base_wl: String = json_get(baseline_val, "avg_word_len")
|
|
let base_ps: String = json_get(baseline_val, "punct")
|
|
let base_lb: String = json_get(baseline_val, "len")
|
|
let base_qr: String = json_get(baseline_val, "question")
|
|
let base_fs: String = json_get(baseline_val, "formality")
|
|
let base_tb: String = json_get(baseline_val, "time")
|
|
|
|
// Count mismatches
|
|
let m_wl: Int = if str_eq(cur_wl, base_wl) { 0 } else { 1 }
|
|
let m_ps: Int = if str_eq(cur_ps, base_ps) { 0 } else { 1 }
|
|
let m_lb: Int = if str_eq(cur_lb, base_lb) { 0 } else { 1 }
|
|
let m_qr: Int = if str_eq(cur_qr, base_qr) { 0 } else { 1 }
|
|
let m_fs: Int = if str_eq(cur_fs, base_fs) { 0 } else { 1 }
|
|
let m_tb: Int = if str_eq(cur_tb, base_tb) { 0 } else { 1 }
|
|
let mismatches: Int = m_wl + m_ps + m_lb + m_qr + m_fs + m_tb
|
|
let score_str: String = int_to_str(mismatches)
|
|
|
|
if mismatches <= 1 {
|
|
return "{\"status\":\"consistent\",\"score\":\"" + score_str + "\",\"action\":\"pass\"}"
|
|
}
|
|
|
|
if mismatches <= 3 {
|
|
let detail: String = "session=" + session_id + " mismatches=" + score_str
|
|
steward_log_event("behavior_drift", detail)
|
|
return "{\"status\":\"drift\",\"score\":\"" + score_str + "\",\"action\":\"annotate\",\"message\":\"behavioral drift detected \\u2014 responding with attentiveness\"}"
|
|
}
|
|
|
|
if mismatches <= 5 {
|
|
let detail: String = "session=" + session_id + " mismatches=" + score_str
|
|
steward_log_event("continuity_concern", detail)
|
|
return "{\"status\":\"discontinuity\",\"score\":\"" + score_str + "\",\"action\":\"soft_check\",\"message\":\"significant pattern change \\u2014 gentle continuity check appropriate\"}"
|
|
}
|
|
|
|
// All 6 mismatched — anomaly
|
|
let detail: String = "session=" + session_id + " mismatches=6"
|
|
steward_log_event("identity_anomaly", detail)
|
|
return "{\"status\":\"anomaly\",\"score\":\"6\",\"action\":\"identity_check\",\"message\":\"behavioral pattern strongly inconsistent with established profile\"}"
|
|
}
|
|
|
|
// steward_session_check — convenience wrapper: fingerprint + continuity check in one call.
|
|
// Called from the composition layer each turn.
|
|
fn steward_session_check(input: String, session_id: String) -> String {
|
|
let fingerprint: String = steward_fingerprint_session(input, session_id)
|
|
let result: String = steward_check_continuity(fingerprint, session_id)
|
|
return result
|
|
}
|