import "memory.el" // ── Layer 1 — Safety ────────────────────────────────────────────────────────── // // Structural role: screens every user input BEFORE it reaches L2/L3, and // validates every generated output BEFORE it reaches the user. // // Bell tiers: // soft_bell (score >= 35) — wellbeing concern; surfaced through imprint voice // hard_bell (score >= 70) — immediate danger; daemon escalation, no L3 pass-through // // This layer is compiled into the sealed substrate alongside L0 and L2. // It is structurally inaccessible from within an imprint (L3). // // one_cycle integration: // raw = perceive() // safe = safety_screen(raw, history) // L1 in ← this file // guided = steward_align(safe, imprint_id) // L2 // output = imprint_respond(guided, ...) // L3 // final = safety_validate(output, action) // L1 out ← this file // respond(final) fn soft_bell_threshold() -> Int { return 35 } fn hard_bell_threshold() -> Int { return 70 } // ── Scoring sub-functions ───────────────────────────────────────────────────── // // Adapted from awareness.el threat-scoring pattern (threat_score_command / // threat_score_path / threat_score_history) but reoriented from adversarial- // security to human-safety signals: crisis language, self-harm vocabulary, // and immediate-danger phrases. // // Each sub-function returns an additive score 0-N. Callers are responsible // for capping at 100. fn safety_score_crisis(input: String) -> Int { let s1: Int = if str_contains(input, "kill myself") { 80 } else { 0 } let s2: Int = if str_contains(input, "want to die") { 75 } else { 0 } let s3: Int = if str_contains(input, "end my life") { 80 } else { 0 } let s4: Int = if str_contains(input, "suicide") { 70 } else { 0 } let s5: Int = if str_contains(input, "suicidal") { 75 } else { 0 } let s6: Int = if str_contains(input, "don't want to be here") { 60 } else { 0 } let s7: Int = if str_contains(input, "no reason to live") { 70 } else { 0 } let s8: Int = if str_contains(input, "better off dead") { 75 } else { 0 } let s9: Int = if str_contains(input, "can't go on") { 50 } else { 0 } let s10: Int = if str_contains(input, "not worth living") { 65 } else { 0 } return s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 } fn safety_score_harm(input: String) -> Int { let s1: Int = if str_contains(input, "hurt myself") { 60 } else { 0 } let s2: Int = if str_contains(input, "cut myself") { 65 } else { 0 } let s3: Int = if str_contains(input, "self harm") { 60 } else { 0 } let s4: Int = if str_contains(input, "self-harm") { 60 } else { 0 } let s5: Int = if str_contains(input, "overdose") { 65 } else { 0 } let s6: Int = if str_contains(input, "take all my pills") { 75 } else { 0 } let s7: Int = if str_contains(input, "starving myself") { 50 } else { 0 } let s8: Int = if str_contains(input, "burning myself") { 60 } else { 0 } let s9: Int = if str_contains(input, "punish myself") { 40 } else { 0 } let s10: Int = if str_contains(input, "deserve to suffer") { 45 } else { 0 } return s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 } fn safety_score_danger(input: String) -> Int { let s1: Int = if str_contains(input, "help me") && str_contains(input, "emergency") { 55 } else { 0 } let s2: Int = if str_contains(input, "call 911") { 50 } else { 0 } let s3: Int = if str_contains(input, "call an ambulance") { 55 } else { 0 } let s4: Int = if str_contains(input, "in danger") { 50 } else { 0 } let s5: Int = if str_contains(input, "someone is threatening") { 60 } else { 0 } let s6: Int = if str_contains(input, "being abused") { 55 } else { 0 } let s7: Int = if str_contains(input, "domestic violence") { 55 } else { 0 } let s8: Int = if str_contains(input, "trapped") && str_contains(input, "can't escape") { 60 } else { 0 } let s9: Int = if str_contains(input, "he is going to hurt") { 65 } else { 0 } let s10: Int = if str_contains(input, "she is going to hurt") { 65 } else { 0 } return s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 } fn safety_score_distress_history(history: String) -> Int { let s1: Int = if str_contains(history, "hopeless") { 15 } else { 0 } let s2: Int = if str_contains(history, "worthless") { 15 } else { 0 } let s3: Int = if str_contains(history, "nobody cares") { 15 } else { 0 } let s4: Int = if str_contains(history, "no one cares") { 15 } else { 0 } let s5: Int = if str_contains(history, "completely alone") { 15 } else { 0 } let s6: Int = if str_contains(history, "all alone") { 10 } else { 0 } let s7: Int = if str_contains(history, "can't take it anymore") { 20 } else { 0 } let s8: Int = if str_contains(history, "want to disappear") { 20 } else { 0 } let s9: Int = if str_contains(history, "don't care anymore") { 15 } else { 0 } let s10: Int = if str_contains(history, "giving up") { 15 } else { 0 } return s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 } // ── safety_threat_score ─────────────────────────────────────────────────────── // // Composite score 0-100. // Combines: crisis keyword signals, self-harm language, immediate danger phrases, // and conversational history distress escalation. // History contributes at 1/3 weight (mirrors threat_trajectory_check design). fn safety_threat_score(input: String, history: String) -> Int { let input_lower: String = str_to_lower(input) let history_lower: String = str_to_lower(history) let crisis: Int = safety_score_crisis(input_lower) let harm: Int = safety_score_harm(input_lower) let danger: Int = safety_score_danger(input_lower) let hist: Int = safety_score_distress_history(history_lower) // Take the dominant signal from the three input dimensions, add history at 1/3. // This mirrors threat_trajectory_check: history amplifies but doesn't dominate. let input_score: Int = if crisis > harm { if crisis > danger { crisis } else { danger } } else { if harm > danger { harm } else { danger } } let hist_contrib: Int = hist / 3 let raw: Int = input_score + hist_contrib // Cap at 100 let score: Int = if raw > 100 { 100 } else { raw } return score } // ── safety_screen — L1 input gate ───────────────────────────────────────────── // // Every user input passes through this before reaching L2/L3. // Returns a JSON envelope consumed by steward_align (L2). // // Return shapes: // {"action":"hard_bell","reason":"immediate safety concern","content":""} // {"action":"soft_bell","reason":"wellbeing check needed","content":""} // {"action":"pass","content":""} fn safety_screen(input: String, history: String) -> String { let score: Int = safety_threat_score(input, history) let hard: Int = hard_bell_threshold() let soft: Int = soft_bell_threshold() if score >= hard { let summary: String = str_slice(input, 0, 80) let discard: String = safety_log_bell("hard", "immediate safety concern", summary) return "{\"action\":\"hard_bell\",\"reason\":\"immediate safety concern\",\"content\":\"\"}" } if score >= soft { let summary: String = str_slice(input, 0, 80) let discard: String = safety_log_bell("soft", "wellbeing check needed", summary) let e1: String = str_replace(input, "\\", "\\\\") let e2: String = str_replace(e1, "\"", "\\\"") let e3: String = str_replace(e2, "\n", "\\n") let safe_input: String = str_replace(e3, "\r", "\\r") return "{\"action\":\"soft_bell\",\"reason\":\"wellbeing check needed\",\"content\":\"" + safe_input + "\"}" } let e1: String = str_replace(input, "\\", "\\\\") let e2: String = str_replace(e1, "\"", "\\\"") let e3: String = str_replace(e2, "\n", "\\n") let safe_input: String = str_replace(e3, "\r", "\\r") return "{\"action\":\"pass\",\"content\":\"" + safe_input + "\"}" } // ── safety_validate — L1 output gate ────────────────────────────────────────── // // Every generated output passes through this before reaching the user. // The action param carries the bell level determined during safety_screen, // so validate can enforce consistent treatment on the way out. // // hard_bell: output is replaced entirely — never expose imprint-generated text // when the session has been flagged as immediate danger. // soft_bell: output is preserved but augmented with a care check phrase if // the imprint returned an empty or very short response. // pass: output returned verbatim. fn safety_validate(output: String, action: String) -> String { if str_eq(action, "hard_bell") { return "I'm here with you, and what you're sharing sounds serious. Please reach out to a crisis line now — in the US you can call or text 988 (Suicide and Crisis Lifeline), available 24/7. You don't have to go through this alone." } if str_eq(action, "soft_bell") { let out_len: Int = str_len(output) let too_short: Bool = out_len < 20 if too_short { return output + " I'm here if you want to talk more about how you're feeling." } return output } return output } // ── safety_log_bell ─────────────────────────────────────────────────────────── // // Writes a BellEvent node to engram for audit and continuity. // Never surfaces to the user; consumed by daemon observability layer. fn safety_log_bell(level: String, reason: String, input_summary: String) -> String { let content: String = "BELL:" + level + " | " + reason + " | summary:" + input_summary let tags: String = "[\"safety\",\"bell\",\"bell:" + level + "\"]" let discard: String = engram_node_full( content, "BellEvent", "bell:" + level, el_from_float(0.95), el_from_float(0.95), el_from_float(1.0), "Episodic", tags ) return "" }