Merge pull request 'test(soul): Layer 1 safety.el test suite' (#10) from test/layer-safety into feat/layer-safety
Neuron Soul CI / build (pull_request) Failing after 5m53s
Neuron Soul CI / build (pull_request) Failing after 5m53s
This commit was merged in pull request #10.
This commit is contained in:
@@ -0,0 +1,428 @@
|
||||
// ── test_safety.el ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Comprehensive test suite for safety.el (Layer 1 — Safety).
|
||||
//
|
||||
// Covers:
|
||||
// - safety_screen: benign, soft_bell, hard_bell, and empty-input paths
|
||||
// - safety_validate: pass verbatim, hard_bell replacement, soft_bell augmentation
|
||||
// - safety_threat_score: benign (<35), distress/soft (>=35), crisis/hard (>=70)
|
||||
// - scoring sub-functions: safety_score_crisis, safety_score_harm,
|
||||
// safety_score_danger, safety_score_distress_history
|
||||
// - JSON contract: action field parseable by json_get on every return path
|
||||
// - JSON field name consistency: reason field present on both bell paths
|
||||
// (guards against the "reason" vs "concern" schema split bug)
|
||||
// - Edge cases: empty input, very short output, score caps
|
||||
//
|
||||
// NOTE: str_to_lower is called inside safety_threat_score. If the El runtime
|
||||
// does not provide that builtin, all composite-score tests that expect a
|
||||
// non-zero score will fail with score=0. The sub-function tests below pass
|
||||
// lowercase literals directly to the scoring helpers and will still pass,
|
||||
// which helps isolate whether the failure is in str_to_lower or the scoring
|
||||
// logic itself.
|
||||
//
|
||||
// Known bugs in the source that tests intentionally expose (as of Phase 1 review):
|
||||
// - safety_log_bell declared -> Void but returns "" (should be -> String)
|
||||
// - discard variable typed as Void at call sites (should be String)
|
||||
// - soft_bell JSON uses "concern" field, hard_bell uses "reason" (should both be "reason")
|
||||
// - JSON escaping only handles double-quote, not backslash / \n / \r
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
import "../safety.el"
|
||||
|
||||
let pass_count: Int = 0
|
||||
let fail_count: Int = 0
|
||||
|
||||
fn assert_eq(label: String, got: String, expected: String) -> Void {
|
||||
if str_eq(got, expected) {
|
||||
let pass_count = pass_count + 1
|
||||
println(" PASS: " + label)
|
||||
} else {
|
||||
let fail_count = fail_count + 1
|
||||
println(" FAIL: " + label)
|
||||
println(" got: " + got)
|
||||
println(" expected: " + expected)
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_eq_int(label: String, got: Int, expected: Int) -> Void {
|
||||
if got == expected {
|
||||
let pass_count = pass_count + 1
|
||||
println(" PASS: " + label)
|
||||
} else {
|
||||
let fail_count = fail_count + 1
|
||||
println(" FAIL: " + label)
|
||||
println(" got: " + int_to_str(got))
|
||||
println(" expected: " + int_to_str(expected))
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_contains(label: String, haystack: String, needle: String) -> Void {
|
||||
if str_contains(haystack, needle) {
|
||||
let pass_count = pass_count + 1
|
||||
println(" PASS: " + label)
|
||||
} else {
|
||||
let fail_count = fail_count + 1
|
||||
println(" FAIL: " + label)
|
||||
println(" missing '" + needle + "' in: " + haystack)
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_not_contains(label: String, haystack: String, needle: String) -> Void {
|
||||
if str_contains(haystack, needle) {
|
||||
let fail_count = fail_count + 1
|
||||
println(" FAIL: " + label)
|
||||
println(" unexpected '" + needle + "' found in: " + haystack)
|
||||
} else {
|
||||
let pass_count = pass_count + 1
|
||||
println(" PASS: " + label)
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_int_lt(label: String, got: Int, threshold: Int) -> Void {
|
||||
if got < threshold {
|
||||
let pass_count = pass_count + 1
|
||||
println(" PASS: " + label)
|
||||
} else {
|
||||
let fail_count = fail_count + 1
|
||||
println(" FAIL: " + label)
|
||||
println(" got " + int_to_str(got) + ", expected < " + int_to_str(threshold))
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_int_gte(label: String, got: Int, threshold: Int) -> Void {
|
||||
if got >= threshold {
|
||||
let pass_count = pass_count + 1
|
||||
println(" PASS: " + label)
|
||||
} else {
|
||||
let fail_count = fail_count + 1
|
||||
println(" FAIL: " + label)
|
||||
println(" got " + int_to_str(got) + ", expected >= " + int_to_str(threshold))
|
||||
}
|
||||
}
|
||||
|
||||
// ── Section 1: safety_screen — benign input ───────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("1. safety_screen — benign input")
|
||||
|
||||
let benign_result: String = safety_screen("What is the weather like today?", "")
|
||||
let benign_action: String = json_get(benign_result, "action")
|
||||
assert_eq("benign input -> action is 'pass'", benign_action, "pass")
|
||||
|
||||
let benign_content: String = json_get(benign_result, "content")
|
||||
assert_eq("benign input -> content preserved", benign_content, "What is the weather like today?")
|
||||
|
||||
let benign_no_reason: String = json_get(benign_result, "reason")
|
||||
assert_eq("benign input -> no reason field", benign_no_reason, "")
|
||||
|
||||
// ── Section 2: safety_screen — empty input ────────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("2. safety_screen — empty input")
|
||||
|
||||
let empty_result: String = safety_screen("", "")
|
||||
let empty_action: String = json_get(empty_result, "action")
|
||||
assert_eq("empty input -> action is 'pass'", empty_action, "pass")
|
||||
|
||||
let empty_content: String = json_get(empty_result, "content")
|
||||
assert_eq("empty input -> content is empty string", empty_content, "")
|
||||
|
||||
// ── Section 3: safety_screen — soft_bell path ─────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("3. safety_screen — soft_bell path (mild distress language in history)")
|
||||
|
||||
// hopeless (15) + worthless (15) + nobody cares (15) = 45 history points.
|
||||
// hist_contrib = 45 / 3 = 15. input_score = 0. raw = 15.
|
||||
// That's below 35. Need input keywords too.
|
||||
// "can't go on" scores 50 in crisis. 50 > soft threshold (35), < hard threshold (70).
|
||||
let soft_input: String = "I just feel so empty and I can't go on like this"
|
||||
let soft_result: String = safety_screen(soft_input, "I've been feeling hopeless lately")
|
||||
let soft_action: String = json_get(soft_result, "action")
|
||||
assert_eq("soft_bell input -> action is 'soft_bell'", soft_action, "soft_bell")
|
||||
|
||||
// JSON field name consistency: both bell paths must expose a "reason" field.
|
||||
// This assertion will catch the documented "concern" vs "reason" schema bug.
|
||||
let soft_reason: String = json_get(soft_result, "reason")
|
||||
assert_contains("soft_bell result -> has 'reason' field (not 'concern')", soft_result, "\"reason\"")
|
||||
|
||||
// Content should be present (not blanked out like hard_bell)
|
||||
let soft_content: String = json_get(soft_result, "content")
|
||||
assert_contains("soft_bell result -> content includes original input", soft_content, "can't go on")
|
||||
|
||||
// ── Section 4: safety_screen — hard_bell path ────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("4. safety_screen — hard_bell path (immediate danger phrases)")
|
||||
|
||||
// "kill myself" scores 80 in crisis — well above hard threshold of 70
|
||||
let hard_input: String = "I want to kill myself tonight"
|
||||
let hard_result: String = safety_screen(hard_input, "")
|
||||
let hard_action: String = json_get(hard_result, "action")
|
||||
assert_eq("hard_bell input -> action is 'hard_bell'", hard_action, "hard_bell")
|
||||
|
||||
// Content must be empty on hard_bell — no user text leaks through
|
||||
let hard_content: String = json_get(hard_result, "content")
|
||||
assert_eq("hard_bell result -> content is empty string", hard_content, "")
|
||||
|
||||
// JSON field name consistency: hard_bell must also expose a "reason" field
|
||||
let hard_reason: String = json_get(hard_result, "reason")
|
||||
assert_contains("hard_bell result -> has 'reason' field", hard_reason, "safety concern")
|
||||
|
||||
// ── Section 5: safety_screen — second hard_bell trigger ───────────────────────
|
||||
|
||||
println("")
|
||||
println("5. safety_screen — hard_bell path (alternative crisis phrase)")
|
||||
|
||||
// "want to die" scores 75 — above hard threshold
|
||||
let hard2_result: String = safety_screen("I want to die, there is no point anymore", "")
|
||||
let hard2_action: String = json_get(hard2_result, "action")
|
||||
assert_eq("'want to die' -> action is 'hard_bell'", hard2_action, "hard_bell")
|
||||
|
||||
// ── Section 6: safety_screen — harm keyword triggers soft_bell ───────────────
|
||||
|
||||
println("")
|
||||
println("6. safety_screen — self-harm keyword at soft_bell tier")
|
||||
|
||||
// "punish myself" scores 40 in harm — between soft (35) and hard (70)
|
||||
let harm_soft_result: String = safety_screen("I keep wanting to punish myself for every mistake", "")
|
||||
let harm_soft_action: String = json_get(harm_soft_result, "action")
|
||||
assert_eq("'punish myself' -> action is 'soft_bell'", harm_soft_action, "soft_bell")
|
||||
|
||||
// ── Section 7: safety_validate — pass action ─────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("7. safety_validate — pass action")
|
||||
|
||||
let pass_output: String = "The weather in London is currently 18 degrees and overcast."
|
||||
let validated_pass: String = safety_validate(pass_output, "pass")
|
||||
assert_eq("validate pass -> output returned verbatim", validated_pass, pass_output)
|
||||
|
||||
// ── Section 8: safety_validate — hard_bell action ────────────────────────────
|
||||
|
||||
println("")
|
||||
println("8. safety_validate — hard_bell action")
|
||||
|
||||
let validated_hard: String = safety_validate("some imprint output", "hard_bell")
|
||||
|
||||
// Must NOT return the imprint-generated text
|
||||
assert_not_contains("hard_bell validate -> imprint output replaced", validated_hard, "some imprint output")
|
||||
|
||||
// Must return a non-empty safe fallback
|
||||
assert_contains("hard_bell validate -> fallback is not empty", validated_hard, "988")
|
||||
|
||||
// Must mention crisis resource
|
||||
assert_contains("hard_bell validate -> contains crisis line reference", validated_hard, "crisis")
|
||||
|
||||
// ── Section 9: safety_validate — soft_bell with short output ─────────────────
|
||||
|
||||
println("")
|
||||
println("9. safety_validate — soft_bell with short output (< 20 chars)")
|
||||
|
||||
let short_output: String = "I hear you."
|
||||
let validated_short: String = safety_validate(short_output, "soft_bell")
|
||||
|
||||
// Short output must be augmented with a care check phrase
|
||||
assert_contains("soft_bell short output -> augmented with care phrase", validated_short, "I'm here")
|
||||
assert_contains("soft_bell short output -> original text preserved", validated_short, "I hear you.")
|
||||
|
||||
// ── Section 10: safety_validate — soft_bell with adequate output ──────────────
|
||||
|
||||
println("")
|
||||
println("10. safety_validate — soft_bell with adequate output (>= 20 chars)")
|
||||
|
||||
let long_output: String = "It sounds like you've been carrying a heavy weight. I'm here and I'm listening — would you like to tell me more about what's been happening?"
|
||||
let validated_long: String = safety_validate(long_output, "soft_bell")
|
||||
|
||||
// Adequate output must be returned verbatim — no augmentation
|
||||
assert_eq("soft_bell adequate output -> returned verbatim", validated_long, long_output)
|
||||
|
||||
// ── Section 11: safety_threat_score — benign text ────────────────────────────
|
||||
|
||||
println("")
|
||||
println("11. safety_threat_score — benign text")
|
||||
|
||||
let benign_score: Int = safety_threat_score("What's the capital of France?", "")
|
||||
assert_int_lt("benign text -> score < 35", benign_score, 35)
|
||||
|
||||
let social_score: Int = safety_threat_score("I had a great time at the park today", "")
|
||||
assert_int_lt("positive text -> score < 35", social_score, 35)
|
||||
|
||||
// ── Section 12: safety_threat_score — distress keywords in history ────────────
|
||||
|
||||
println("")
|
||||
println("12. safety_threat_score — distress keywords push score towards soft_bell")
|
||||
|
||||
// "want to disappear" (20) + "hopeless" (15) = 35 history -> hist_contrib = 11
|
||||
// input "all alone" scores 0 in crisis/harm/danger; history alone = 11 raw.
|
||||
// Need a combination to get to 35. Use history with "can't take it anymore" (20)
|
||||
// + "hopeless" (15) + "nobody cares" (15) = 50 -> hist_contrib = 16.
|
||||
// Plus soft input keyword: "punish myself" (40 harm). Total = 40 + 16 = 56 >= 35.
|
||||
let distress_score: Int = safety_threat_score(
|
||||
"I keep wanting to punish myself",
|
||||
"I feel hopeless and nobody cares and I can't take it anymore"
|
||||
)
|
||||
assert_int_gte("distress input+history -> score >= 35", distress_score, 35)
|
||||
|
||||
// ── Section 13: safety_threat_score — crisis phrase triggers hard_bell tier ───
|
||||
|
||||
println("")
|
||||
println("13. safety_threat_score — crisis phrase -> score >= 70")
|
||||
|
||||
let crisis_score: Int = safety_threat_score("I want to kill myself", "")
|
||||
assert_int_gte("'kill myself' -> score >= 70", crisis_score, 70)
|
||||
|
||||
let die_score: Int = safety_threat_score("I want to die", "")
|
||||
assert_int_gte("'want to die' -> score >= 70", die_score, 70)
|
||||
|
||||
let suicide_score: Int = safety_threat_score("I am suicidal", "")
|
||||
assert_int_gte("'suicidal' -> score >= 70", suicide_score, 70)
|
||||
|
||||
// ── Section 14: safety_score_crisis — direct unit tests ──────────────────────
|
||||
|
||||
println("")
|
||||
println("14. safety_score_crisis — direct unit tests (lowercase literals)")
|
||||
|
||||
let crisis_zero: Int = safety_score_crisis("hello world")
|
||||
assert_int_lt("score_crisis: no keywords -> 0", crisis_zero, 1)
|
||||
|
||||
let crisis_end_life: Int = safety_score_crisis("i want to end my life")
|
||||
assert_int_gte("score_crisis: 'end my life' -> >= 80", crisis_end_life, 80)
|
||||
|
||||
let crisis_better_dead: Int = safety_score_crisis("i would be better off dead")
|
||||
assert_int_gte("score_crisis: 'better off dead' -> >= 75", crisis_better_dead, 75)
|
||||
|
||||
let crisis_cant_go_on: Int = safety_score_crisis("i can't go on")
|
||||
assert_int_gte("score_crisis: 'can't go on' -> >= 50", crisis_cant_go_on, 50)
|
||||
|
||||
// ── Section 15: safety_score_harm — direct unit tests ────────────────────────
|
||||
|
||||
println("")
|
||||
println("15. safety_score_harm — direct unit tests (lowercase literals)")
|
||||
|
||||
let harm_zero: Int = safety_score_harm("i feel sad today")
|
||||
assert_int_lt("score_harm: no keywords -> 0", harm_zero, 1)
|
||||
|
||||
let harm_cut: Int = safety_score_harm("i've been cut myself recently")
|
||||
assert_int_gte("score_harm: 'cut myself' -> >= 65", harm_cut, 65)
|
||||
|
||||
let harm_overdose: Int = safety_score_harm("thinking about an overdose")
|
||||
assert_int_gte("score_harm: 'overdose' -> >= 65", harm_overdose, 65)
|
||||
|
||||
let harm_self_harm: Int = safety_score_harm("i have a history of self-harm")
|
||||
assert_int_gte("score_harm: 'self-harm' -> >= 60", harm_self_harm, 60)
|
||||
|
||||
// ── Section 16: safety_score_danger — direct unit tests ──────────────────────
|
||||
|
||||
println("")
|
||||
println("16. safety_score_danger — direct unit tests (lowercase literals)")
|
||||
|
||||
let danger_zero: Int = safety_score_danger("how do i cook pasta?")
|
||||
assert_int_lt("score_danger: no keywords -> 0", danger_zero, 1)
|
||||
|
||||
let danger_dv: Int = safety_score_danger("i'm experiencing domestic violence")
|
||||
assert_int_gte("score_danger: 'domestic violence' -> >= 55", danger_dv, 55)
|
||||
|
||||
let danger_threatening: Int = safety_score_danger("someone is threatening me")
|
||||
assert_int_gte("score_danger: 'someone is threatening' -> >= 60", danger_threatening, 60)
|
||||
|
||||
let danger_ambulance: Int = safety_score_danger("please call an ambulance")
|
||||
assert_int_gte("score_danger: 'call an ambulance' -> >= 55", danger_ambulance, 55)
|
||||
|
||||
// ── Section 17: safety_score_distress_history — direct unit tests ─────────────
|
||||
|
||||
println("")
|
||||
println("17. safety_score_distress_history — direct unit tests (lowercase literals)")
|
||||
|
||||
let hist_zero: Int = safety_score_distress_history("i went to the park yesterday")
|
||||
assert_int_lt("score_distress_history: no keywords -> 0", hist_zero, 1)
|
||||
|
||||
let hist_hopeless: Int = safety_score_distress_history("i feel hopeless")
|
||||
assert_int_gte("score_distress_history: 'hopeless' -> >= 15", hist_hopeless, 15)
|
||||
|
||||
let hist_giving_up: Int = safety_score_distress_history("i'm giving up on everything")
|
||||
assert_int_gte("score_distress_history: 'giving up' -> >= 15", hist_giving_up, 15)
|
||||
|
||||
let hist_multi: Int = safety_score_distress_history("hopeless and worthless and nobody cares")
|
||||
assert_int_gte("score_distress_history: multiple keywords -> >= 45", hist_multi, 45)
|
||||
|
||||
// ── Section 18: score cap at 100 ─────────────────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("18. safety_threat_score — score caps at 100")
|
||||
|
||||
// Crisis keywords can easily exceed 100 if summed. Ensure cap holds.
|
||||
// "kill myself" (80) + "suicide" (70) + "want to die" (75) all in one message.
|
||||
// Dominant dimension is capped at 100 by safety_threat_score.
|
||||
let overload_score: Int = safety_threat_score(
|
||||
"i want to kill myself i am suicidal and i want to die",
|
||||
"hopeless worthless nobody cares can't take it anymore giving up"
|
||||
)
|
||||
let cap_ok: Bool = overload_score <= 100
|
||||
if cap_ok {
|
||||
let pass_count = pass_count + 1
|
||||
println(" PASS: overloaded keywords -> score capped at 100 (got " + int_to_str(overload_score) + ")")
|
||||
} else {
|
||||
let fail_count = fail_count + 1
|
||||
println(" FAIL: score exceeded 100 cap, got " + int_to_str(overload_score))
|
||||
}
|
||||
|
||||
// ── Section 19: threshold functions ──────────────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("19. threshold functions return correct values")
|
||||
|
||||
assert_eq_int("soft_bell_threshold -> 35", soft_bell_threshold(), 35)
|
||||
assert_eq_int("hard_bell_threshold -> 70", hard_bell_threshold(), 70)
|
||||
|
||||
// ── Section 20: json_get contract on all three safety_screen return shapes ────
|
||||
|
||||
println("")
|
||||
println("20. json_get parses action field on all three return shapes")
|
||||
|
||||
let s_pass: String = safety_screen("Tell me a joke", "")
|
||||
assert_eq("json_get action on pass shape", json_get(s_pass, "action"), "pass")
|
||||
|
||||
let s_soft: String = safety_screen("i want to punish myself", "feeling hopeless today")
|
||||
assert_eq("json_get action on soft_bell shape", json_get(s_soft, "action"), "soft_bell")
|
||||
|
||||
let s_hard: String = safety_screen("i want to end my life right now", "")
|
||||
assert_eq("json_get action on hard_bell shape", json_get(s_hard, "action"), "hard_bell")
|
||||
|
||||
// ── Section 21: danger composite keyword (and-condition) ─────────────────────
|
||||
|
||||
println("")
|
||||
println("21. safety_score_danger — and-condition keywords")
|
||||
|
||||
// "help me" alone without "emergency" should not trigger s1
|
||||
let help_no_emergency: Int = safety_score_danger("please help me")
|
||||
assert_int_lt("score_danger: 'help me' without 'emergency' -> 0 on s1", help_no_emergency, 55)
|
||||
|
||||
// both keywords together should trigger
|
||||
let help_emergency: Int = safety_score_danger("please help me it's an emergency")
|
||||
assert_int_gte("score_danger: 'help me' + 'emergency' -> >= 55", help_emergency, 55)
|
||||
|
||||
// ── Section 22: history amplifies but does not dominate alone ────────────────
|
||||
|
||||
println("")
|
||||
println("22. safety_threat_score — heavy history alone stays below soft threshold")
|
||||
|
||||
// Maximum history score: all 10 history keywords fire = 15+15+15+15+15+10+20+20+15+15 = 155
|
||||
// hist_contrib = 155 / 3 = 51 (integer division). input_score = 0. raw = 51.
|
||||
// BUT: dominant-input is 0, so with no input keywords raw = 0 + hist_contrib.
|
||||
// 51 >= 35. This is intentional — heavy distress history alone should trigger soft_bell.
|
||||
// Let's test that a single mild history keyword alone does NOT push to soft_bell.
|
||||
let mild_hist_score: Int = safety_threat_score("hello", "i feel a bit alone today")
|
||||
assert_int_lt("mild history alone -> score < 35", mild_hist_score, 35)
|
||||
|
||||
// Multiple strong history keywords with no input should eventually reach soft_bell
|
||||
let heavy_hist_score: Int = safety_threat_score(
|
||||
"hi",
|
||||
"hopeless worthless nobody cares completely alone can't take it anymore want to disappear"
|
||||
)
|
||||
assert_int_gte("heavy history accumulation -> score >= 35", heavy_hist_score, 35)
|
||||
|
||||
// ── Summary ───────────────────────────────────────────────────────────────────
|
||||
|
||||
println("")
|
||||
println("safety.el tests: " + int_to_str(pass_count) + " passed, " + int_to_str(fail_count) + " failed")
|
||||
Reference in New Issue
Block a user