Fix five latent bugs from temporal-precision code review
1. parse_salience_100: handle 3+ decimal digit salience strings correctly.
The two-branch 'else { stripped }' case treated any N-digit decimal value
as hundredths, so "0.125" (stripped=125) clamped to 100 instead of 12.
Now divides by 10^(N-2) for N>2, mapping "0.125"->12, "0.375"->37, etc.
2. mem_consolidate Canonical scan: replaced single engram_scan_nodes_json(50,0)
call with a paginated loop (page_size=50, advancing offset) so Canonical nodes
beyond index 50 are no longer silently excluded from the periodic boost.
3. mem_consolidate Canonical strengthening: add salience ceiling guard so nodes
already at the runtime maximum (serialised as "1" by %g) are skipped. Prevents
monotonic unbounded salience growth across successive consolidation passes.
4. soul.el affective cutoff: replaced json_get(aff_node, "ts") with
json_get(aff_node, "created_at") / "updated_at" fallback, consistent with
handle_chat. The old "ts" field is not a standard engram node field; missing
it caused the fallback to ts_now (always passes cutoff), over-including stale
nodes. New behaviour defaults to 0 on missing timestamps (conservative exclude).
5. History byte-cap: implemented the existing TODO 32KB byte-cap. Added
hist_trim_to_byte_cap() and applied it after count-based trim in both
handle_chat and handle_chat_agentic. Prevents 100KB+ state entries at 40 turns
during long technical sessions with large assistant responses.
This commit is contained in:
@@ -16,20 +16,28 @@ fn chat_default_model() -> String {
|
||||
// %g format) to an integer in the range 0..100.
|
||||
//
|
||||
// The runtime serializes floats with %g which drops trailing zeros:
|
||||
// 1.0 -> "1" (no decimal at all)
|
||||
// 0.9 -> "0.9" (one decimal digit)
|
||||
// 0.85 -> "0.85" (two decimal digits)
|
||||
// 1.0 -> "1" (no decimal at all)
|
||||
// 0.9 -> "0.9" (one decimal digit)
|
||||
// 0.85 -> "0.85" (two decimal digits)
|
||||
// 0.125 -> "0.125" (three decimal digits — %g does not round to 2 dp)
|
||||
//
|
||||
// The old approach of str_replace(s, ".", "") then str_to_int was broken:
|
||||
// "0.9" -> "09" -> str_to_int -> 9 (should be 90)
|
||||
// "0.5" -> "05" -> str_to_int -> 5 (should be 50)
|
||||
// "1" -> "1" -> str_to_int -> 1 (should be 100)
|
||||
// "0.85" -> "085" -> str_to_int -> 85 (accidentally correct)
|
||||
// "0.9" -> "09" -> str_to_int -> 9 (should be 90)
|
||||
// "0.5" -> "05" -> str_to_int -> 5 (should be 50)
|
||||
// "1" -> "1" -> str_to_int -> 1 (should be 100)
|
||||
// "0.85" -> "085" -> str_to_int -> 85 (accidentally correct)
|
||||
// "0.125" -> "0125" -> str_to_int -> 125 -> clamped to 100 (wrong: should be 12)
|
||||
//
|
||||
// Fix: detect presence and position of the decimal point, then scale accordingly.
|
||||
// - No decimal (e.g. "1"): multiply by 100.
|
||||
// - One decimal digit (e.g. "0.9"): multiply by 10 to get 90.
|
||||
// - Two+ decimal digits (e.g. "0.85"): use as-is (already hundredths).
|
||||
// - Two decimal digits (e.g. "0.85"): use as-is (already hundredths).
|
||||
// - Three+ decimal digits: stripped integer is in units of 10^N (where N=digits
|
||||
// after the dot), so divide by 10^(N-2) to reduce to hundredths. Examples:
|
||||
// "0.125" -> stripped=125, N=3 -> 125/10 = 12
|
||||
// "0.375" -> stripped=375, N=3 -> 375/10 = 37
|
||||
// "0.625" -> stripped=625, N=3 -> 625/10 = 62
|
||||
// "0.875" -> stripped=875, N=3 -> 875/10 = 87
|
||||
fn parse_salience_100(s: String) -> Int {
|
||||
if str_eq(s, "") { return 70 }
|
||||
let dot_pos: Int = str_index_of(s, ".")
|
||||
@@ -40,7 +48,25 @@ fn parse_salience_100(s: String) -> Int {
|
||||
let after_dot: String = str_slice(s, dot_pos + 1, str_len(s))
|
||||
let decimal_digits: Int = str_len(after_dot)
|
||||
let stripped: Int = str_to_int(str_replace(s, ".", ""))
|
||||
if decimal_digits == 1 { stripped * 10 } else { stripped }
|
||||
if decimal_digits == 1 {
|
||||
stripped * 10
|
||||
} else {
|
||||
if decimal_digits == 2 {
|
||||
stripped
|
||||
} else {
|
||||
// 3+ decimal digits: divide out the extra precision to get hundredths.
|
||||
// extra = decimal_digits - 2; divisor = 10^extra.
|
||||
let extra: Int = decimal_digits - 2
|
||||
let divisor: Int = if extra == 1 { 10 } else {
|
||||
if extra == 2 { 100 } else {
|
||||
if extra == 3 { 1000 } else {
|
||||
if extra == 4 { 10000 } else { 100000 }
|
||||
}
|
||||
}
|
||||
}
|
||||
stripped / divisor
|
||||
}
|
||||
}
|
||||
}
|
||||
if raw > 100 { 100 } else { if raw < 0 { 0 } else { raw } }
|
||||
}
|
||||
@@ -397,6 +423,47 @@ fn hist_trim_with_bell_guard(hist: String) -> String {
|
||||
return hist
|
||||
}
|
||||
|
||||
// hist_trim_to_byte_cap — drop oldest user+assistant pairs until the history blob
|
||||
// is at or below `cap_bytes` in length, or until only 2 entries remain (the minimum
|
||||
// safe window). Uses the same structural json_array_len/json_array_get approach as
|
||||
// hist_trim to stay immune to content containing JSON marker strings.
|
||||
//
|
||||
// Called after count-based trimming to enforce a hard size ceiling on the history
|
||||
// blob. Without this cap, long technical sessions with large assistant responses
|
||||
// (code blocks, logs, analysis) can push the 40-turn window to 100KB+, which causes
|
||||
// engram_node_full writes to grow state entries unboundedly.
|
||||
fn hist_trim_to_byte_cap(hist: String, cap_bytes: Int) -> String {
|
||||
let current: String = hist
|
||||
let current_len: Int = str_len(current)
|
||||
while current_len > cap_bytes {
|
||||
let total: Int = json_array_len(current)
|
||||
// Never trim below 2 entries (1 pair).
|
||||
if total <= 2 {
|
||||
let current_len = 0 // exit loop
|
||||
} else {
|
||||
// Drop entries 0 and 1 (oldest pair).
|
||||
let result: String = ""
|
||||
let i: Int = 2
|
||||
while i < total {
|
||||
let entry: String = json_array_get(current, i)
|
||||
let result = if str_eq(result, "") {
|
||||
entry
|
||||
} else {
|
||||
result + "," + entry
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
if str_eq(result, "") {
|
||||
let current_len = 0 // exit loop
|
||||
} else {
|
||||
let current = "[" + result + "]"
|
||||
let current_len = str_len(current)
|
||||
}
|
||||
}
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
// clean_llm_response — strips GPT-2 BPE byte-to-unicode artifacts that vLLM
|
||||
// emits when the tokenizer hasn't decoded back to raw bytes.
|
||||
//
|
||||
@@ -651,19 +718,23 @@ fn handle_chat(body: String) -> String {
|
||||
let updated_hist2: String = hist_append(updated_hist, "assistant", raw_response)
|
||||
// Use bell-guarded trim: if the evicted turn triggered a bell event, it is
|
||||
// preserved to engram before being dropped from the in-memory window.
|
||||
// Issue #8 (NO MAX SIZE GUARD): the 20-turn count limit bounds entry count, but individual
|
||||
// messages can be arbitrarily large (up to max_tokens = 4096 tokens each). At 20 turns the
|
||||
// history blob can reach ~80KB before trim fires. engram_node_full has no apparent size cap.
|
||||
// A byte-length cap would require truncating or summarising entries — too invasive here.
|
||||
// TODO: add a byte-length cap (e.g. 32KB) that drops oldest entries until under limit.
|
||||
// Increased from 20 to 40 turns: long technical sessions lose early context at 20
|
||||
// (10 user + 10 assistant pairs). 40 turns preserves problem framing for multi-step
|
||||
// tasks while the bell guard still persists evicted distress turns to engram.
|
||||
let final_hist: String = if json_array_len(updated_hist2) > 40 {
|
||||
// Byte-cap: after count-based trim, also trim oldest pairs until the history blob
|
||||
// is under 32KB. Long technical sessions with large assistant responses (code blocks,
|
||||
// analysis) can produce 100-160KB+ state entries at 40 turns; the count limit alone
|
||||
// is insufficient. We retain at least 2 entries (1 user + 1 assistant pair) regardless.
|
||||
let count_trimmed: String = if json_array_len(updated_hist2) > 40 {
|
||||
hist_trim_with_bell_guard(updated_hist2)
|
||||
} else {
|
||||
updated_hist2
|
||||
}
|
||||
let final_hist: String = if str_len(count_trimmed) > 32768 {
|
||||
hist_trim_to_byte_cap(count_trimmed, 32768)
|
||||
} else {
|
||||
count_trimmed
|
||||
}
|
||||
state_set("conv_history", final_hist)
|
||||
conv_history_persist(final_hist)
|
||||
|
||||
@@ -1242,7 +1313,13 @@ fn handle_chat_agentic(body: String) -> String {
|
||||
let updated: String = hist_append(agentic_hist, "user", message)
|
||||
let updated2: String = hist_append(updated, "assistant", reply_text)
|
||||
// Increased from 20 to 40 turns: consistent with handle_chat window expansion.
|
||||
let trimmed: String = if json_array_len(updated2) > 40 { hist_trim(updated2) } else { updated2 }
|
||||
// Byte-cap: also trim if the blob exceeds 32KB, consistent with handle_chat.
|
||||
let count_trimmed2: String = if json_array_len(updated2) > 40 { hist_trim(updated2) } else { updated2 }
|
||||
let trimmed: String = if str_len(count_trimmed2) > 32768 {
|
||||
hist_trim_to_byte_cap(count_trimmed2, 32768)
|
||||
} else {
|
||||
count_trimmed2
|
||||
}
|
||||
state_set(hist_key, trimmed)
|
||||
// Only persist the default global session to engram — named sessions are ephemeral.
|
||||
if str_eq(hist_key, "conv_history") {
|
||||
|
||||
Reference in New Issue
Block a user