// runtime/string.el — String operations implemented in El. // // All functions delegate character-level work to the seed primitives declared // in el_seed.c. No C is written here; this is pure El source that compiles // to C via the normal El pipeline. // // Seed primitives used (provided by el_seed.c): // __str_len(s) -> Int // __str_char_at(s, i) -> Int (char code at byte index i) // __str_alloc(n) -> String (n-byte zero-filled mutable buffer) // __str_set_char(s, i, c) -> String (mutate s[i]=c, return s) // __str_cmp(a, b) -> Int (strcmp) // __str_ncmp(a, b, n) -> Int (strncmp) // __str_concat_raw(a, b) -> String // __str_slice_raw(s, lo, hi) -> String (substring copy [lo, hi)) // __int_to_str(n) -> String // __str_to_int(s) -> Int // __float_to_str(f) -> String // __str_to_float(s) -> Float // __println(s) // __print(s) // __readline() -> String // __url_encode(s) -> String // __url_decode(s) -> String // ── I/O ────────────────────────────────────────────────────────────────────── fn println(s: String) -> Void { __println(s) } fn print(s: String) -> Void { __print(s) } fn readline() -> String { return __readline() } // ── Type conversions ────────────────────────────────────────────────────────── fn int_to_str(n: Int) -> String { return __int_to_str(n) } fn str_to_int(s: String) -> Int { return __str_to_int(s) } fn float_to_str(f: Float) -> String { return __float_to_str(f) } fn str_to_float(s: String) -> Float { return __str_to_float(s) } fn bool_to_str(b: Bool) -> String { if b { return "true" } return "false" } // ── URL encoding ────────────────────────────────────────────────────────────── fn url_encode(s: String) -> String { return __url_encode(s) } fn url_decode(s: String) -> String { return __url_decode(s) } // ── Math ────────────────────────────────────────────────────────────────────── fn el_abs(n: Int) -> Int { if n < 0 { return 0 - n } return n } fn el_max(a: Int, b: Int) -> Int { if a > b { return a } return b } fn el_min(a: Int, b: Int) -> Int { if a < b { return a } return b } // ── Core string primitives ──────────────────────────────────────────────────── fn str_len(s: String) -> Int { return __str_len(s) } fn str_eq(a: String, b: String) -> Bool { return __str_cmp(a, b) == 0 } fn str_concat(a: String, b: String) -> String { return __str_concat_raw(a, b) } fn str_slice(s: String, start: Int, end: Int) -> String { let slen: Int = __str_len(s) let lo: Int = start if lo < 0 { lo = 0 } if lo > slen { lo = slen } let hi: Int = end if hi < lo { hi = lo } if hi > slen { hi = slen } return __str_slice_raw(s, lo, hi) } // ── Whitespace helpers (internal) ───────────────────────────────────────────── // // _is_ws: returns true for ASCII whitespace (space, tab, \n, \r, \f, \v). fn _is_ws(c: Int) -> Bool { if c == 32 { return true } // space if c == 9 { return true } // tab if c == 10 { return true } // \n if c == 13 { return true } // \r if c == 12 { return true } // \f if c == 11 { return true } // \v return false } // Scan forward from index 0; return index of first byte not in whitespace, // or n if the entire string is whitespace. fn _find_first_non_ws(s: String, n: Int) -> Int { let i: Int = 0 while i < n { if !_is_ws(__str_char_at(s, i)) { return i } i = i + 1 } return n } // Scan backward from index n-1; return index of last non-whitespace byte, // or -1 if the entire string is whitespace. fn _find_last_non_ws(s: String, n: Int) -> Int { let i: Int = n - 1 while i >= 0 { if !_is_ws(__str_char_at(s, i)) { return i } i = i - 1 } return -1 } // ── Comparison and search ───────────────────────────────────────────────────── fn str_starts_with(s: String, prefix: String) -> Bool { let plen: Int = __str_len(prefix) let slen: Int = __str_len(s) if plen > slen { return false } return __str_ncmp(s, prefix, plen) == 0 } fn str_ends_with(s: String, suffix: String) -> Bool { let slen: Int = __str_len(s) let suflen: Int = __str_len(suffix) if suflen > slen { return false } let tail: String = __str_slice_raw(s, slen - suflen, slen) return __str_cmp(tail, suffix) == 0 } fn str_contains(s: String, sub: String) -> Bool { let slen: Int = __str_len(s) let sublen: Int = __str_len(sub) if sublen == 0 { return true } if sublen > slen { return false } let limit: Int = slen - sublen let i: Int = 0 while i <= limit { let window: String = __str_slice_raw(s, i, i + sublen) if __str_cmp(window, sub) == 0 { return true } i = i + 1 } return false } fn str_index_of(s: String, sub: String) -> Int { let slen: Int = __str_len(s) let sublen: Int = __str_len(sub) if sublen == 0 { return 0 } if sublen > slen { return -1 } let limit: Int = slen - sublen let i: Int = 0 while i <= limit { let window: String = __str_slice_raw(s, i, i + sublen) if __str_cmp(window, sub) == 0 { return i } i = i + 1 } return -1 } fn str_last_index_of(s: String, sub: String) -> Int { let slen: Int = __str_len(s) let sublen: Int = __str_len(sub) if sublen == 0 { return slen } if sublen > slen { return -1 } let last: Int = -1 let limit: Int = slen - sublen let i: Int = 0 while i <= limit { let window: String = __str_slice_raw(s, i, i + sublen) if __str_cmp(window, sub) == 0 { last = i i = i + sublen } else { i = i + 1 } } return last } fn str_index_of_all(s: String, sub: String) -> [Int] { let result: [Int] = el_list_empty() let slen: Int = __str_len(s) let sublen: Int = __str_len(sub) if sublen == 0 { return result } if sublen > slen { return result } let limit: Int = slen - sublen let i: Int = 0 while i <= limit { let window: String = __str_slice_raw(s, i, i + sublen) if __str_cmp(window, sub) == 0 { result = el_list_append(result, i) i = i + sublen } else { i = i + 1 } } return result } // Return the byte index of the first character in s that appears in any_of, // or -1 if none found. fn str_find_chars(s: String, any_of: String) -> Int { let slen: Int = __str_len(s) let alen: Int = __str_len(any_of) if alen == 0 { return -1 } let i: Int = 0 while i < slen { let c: Int = __str_char_at(s, i) let j: Int = 0 while j < alen { if c == __str_char_at(any_of, j) { return i } j = j + 1 } i = i + 1 } return -1 } // ── Character access ────────────────────────────────────────────────────────── // Return a one-character string at byte index i, or "" if out of range. fn str_char_at(s: String, i: Int) -> String { let slen: Int = __str_len(s) if i < 0 { return "" } if i >= slen { return "" } return __str_slice_raw(s, i, i + 1) } // Return the char code (byte value) at byte index i, or 0 if out of range. fn str_char_code(s: String, i: Int) -> Int { let slen: Int = __str_len(s) if i < 0 { return 0 } if i >= slen { return 0 } return __str_char_at(s, i) } // ── Case conversion ─────────────────────────────────────────────────────────── fn str_to_upper(s: String) -> String { let n: Int = __str_len(s) if n == 0 { return "" } let out: String = __str_alloc(n) let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) // a-z (97-122) -> A-Z (65-90): subtract 32 if c >= 97 { if c <= 122 { c = c - 32 } } out = __str_set_char(out, i, c) i = i + 1 } return out } fn str_to_lower(s: String) -> String { let n: Int = __str_len(s) if n == 0 { return "" } let out: String = __str_alloc(n) let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) // A-Z (65-90) -> a-z (97-122): add 32 if c >= 65 { if c <= 90 { c = c + 32 } } out = __str_set_char(out, i, c) i = i + 1 } return out } // Aliases used in existing El codebases. fn str_lower(s: String) -> String { return str_to_lower(s) } fn str_upper(s: String) -> String { return str_to_upper(s) } // ── Whitespace trimming ─────────────────────────────────────────────────────── fn str_trim(s: String) -> String { let n: Int = __str_len(s) if n == 0 { return "" } let lo: Int = _find_first_non_ws(s, n) if lo == n { return "" } let hi: Int = _find_last_non_ws(s, n) return __str_slice_raw(s, lo, hi + 1) } fn str_lstrip(s: String) -> String { let n: Int = __str_len(s) if n == 0 { return "" } let lo: Int = _find_first_non_ws(s, n) if lo == n { return "" } return __str_slice_raw(s, lo, n) } fn str_rstrip(s: String) -> String { let n: Int = __str_len(s) if n == 0 { return "" } let hi: Int = _find_last_non_ws(s, n) if hi < 0 { return "" } return __str_slice_raw(s, 0, hi + 1) } // ── Replacement ─────────────────────────────────────────────────────────────── fn str_replace(s: String, from: String, to: String) -> String { let slen: Int = __str_len(s) let flen: Int = __str_len(from) if flen == 0 { return s } if slen == 0 { return s } // Scan s left-to-right; emit `to` on each match, otherwise emit one byte. let result: String = "" let i: Int = 0 while i < slen { // Try to match `from` at position i if i + flen <= slen { let window: String = __str_slice_raw(s, i, i + flen) if __str_cmp(window, from) == 0 { result = __str_concat_raw(result, to) i = i + flen } else { let ch: String = __str_slice_raw(s, i, i + 1) result = __str_concat_raw(result, ch) i = i + 1 } } else { // Not enough bytes left for a match — emit remainder and stop. let tail: String = __str_slice_raw(s, i, slen) result = __str_concat_raw(result, tail) i = slen } } return result } // ── Repetition and reversal ─────────────────────────────────────────────────── fn str_repeat(s: String, n: Int) -> String { if n <= 0 { return "" } let slen: Int = __str_len(s) if slen == 0 { return "" } let result: String = "" let i: Int = 0 while i < n { result = __str_concat_raw(result, s) i = i + 1 } return result } // Byte-reverse (correct for ASCII; for multi-byte UTF-8 codepoints this // reverses bytes within a codepoint, which is intentional at this tier — // Phase 2 will add grapheme-aware reversal). fn str_reverse(s: String) -> String { let n: Int = __str_len(s) if n == 0 { return "" } let out: String = __str_alloc(n) let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) out = __str_set_char(out, n - 1 - i, c) i = i + 1 } return out } // ── Prefix/suffix stripping ─────────────────────────────────────────────────── fn str_strip_prefix(s: String, prefix: String) -> String { let slen: Int = __str_len(s) let plen: Int = __str_len(prefix) if plen == 0 { return s } if plen > slen { return s } if __str_ncmp(s, prefix, plen) == 0 { return __str_slice_raw(s, plen, slen) } return s } fn str_strip_suffix(s: String, suffix: String) -> String { let slen: Int = __str_len(s) let suflen: Int = __str_len(suffix) if suflen == 0 { return s } if suflen > slen { return s } let tail: String = __str_slice_raw(s, slen - suflen, slen) if __str_cmp(tail, suffix) == 0 { return __str_slice_raw(s, 0, slen - suflen) } return s } // Strip leading and trailing bytes whose char code appears in `chars`. fn str_strip_chars(s: String, chars: String) -> String { let slen: Int = __str_len(s) let clen: Int = __str_len(chars) if slen == 0 { return "" } if clen == 0 { return s } let lo: Int = _find_first_not_in_charset(s, chars, slen, clen) if lo == slen { return "" } let hi: Int = _find_last_not_in_charset(s, chars, slen, clen) return __str_slice_raw(s, lo, hi + 1) } // Internal: true if char code `c` is present in the charset string. fn _char_in_set(c: Int, chars: String, clen: Int) -> Bool { let j: Int = 0 while j < clen { if c == __str_char_at(chars, j) { return true } j = j + 1 } return false } fn _find_first_not_in_charset(s: String, chars: String, slen: Int, clen: Int) -> Int { let i: Int = 0 while i < slen { if !_char_in_set(__str_char_at(s, i), chars, clen) { return i } i = i + 1 } return slen } fn _find_last_not_in_charset(s: String, chars: String, slen: Int, clen: Int) -> Int { let i: Int = slen - 1 while i >= 0 { if !_char_in_set(__str_char_at(s, i), chars, clen) { return i } i = i - 1 } return -1 } // ── Padding ─────────────────────────────────────────────────────────────────── // Pad s on the left to `width` total chars, repeating `pad` cyclically. fn str_pad_left(s: String, width: Int, pad: String) -> String { let slen: Int = __str_len(s) if slen >= width { return s } let plen: Int = __str_len(pad) if plen == 0 { return s } let need: Int = width - slen let prefix: String = "" let i: Int = 0 while i < need { // Select pad character at position (i mod plen) let pad_idx: Int = i - (i / plen) * plen let pc: String = __str_slice_raw(pad, pad_idx, pad_idx + 1) prefix = __str_concat_raw(prefix, pc) i = i + 1 } return __str_concat_raw(prefix, s) } // Pad s on the right to `width` total chars, repeating `pad` cyclically. fn str_pad_right(s: String, width: Int, pad: String) -> String { let slen: Int = __str_len(s) if slen >= width { return s } let plen: Int = __str_len(pad) if plen == 0 { return s } let need: Int = width - slen let suffix: String = "" let i: Int = 0 while i < need { let pad_idx: Int = i - (i / plen) * plen let pc: String = __str_slice_raw(pad, pad_idx, pad_idx + 1) suffix = __str_concat_raw(suffix, pc) i = i + 1 } return __str_concat_raw(s, suffix) } // ── Counting ────────────────────────────────────────────────────────────────── // Count non-overlapping occurrences of `sub` in `s`. Empty sub returns 0. fn str_count(s: String, sub: String) -> Int { let slen: Int = __str_len(s) let sublen: Int = __str_len(sub) if sublen == 0 { return 0 } if sublen > slen { return 0 } let count: Int = 0 let limit: Int = slen - sublen let i: Int = 0 while i <= limit { let window: String = __str_slice_raw(s, i, i + sublen) if __str_cmp(window, sub) == 0 { count = count + 1 i = i + sublen } else { i = i + 1 } } return count } // Byte count — alias of str_len. fn str_count_bytes(s: String) -> Int { return __str_len(s) } // UTF-8 codepoint count: count bytes that are NOT continuation bytes (10xxxxxx). // Continuation bytes have the pattern 10xxxxxx = 0x80..0xBF (128..191). fn str_count_chars(s: String) -> Int { let n: Int = __str_len(s) let count: Int = 0 let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) // Continuation bytes are in range [128, 191]; skip them. // All other bytes (< 128 ASCII, or >= 192 leading bytes) start a codepoint. if c < 128 { count = count + 1 } else { if c >= 192 { count = count + 1 } } i = i + 1 } return count } // Count newline-delimited lines. A trailing newline does NOT add an extra empty line. fn str_count_lines(s: String) -> Int { let n: Int = __str_len(s) if n == 0 { return 0 } let count: Int = 0 let has_content: Bool = false let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) has_content = true if c == 10 { // \n count = count + 1 has_content = false } i = i + 1 } if has_content { count = count + 1 } return count } // Count whitespace-delimited words (non-empty tokens). fn str_count_words(s: String) -> Int { let n: Int = __str_len(s) let count: Int = 0 let in_word: Bool = false let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) if _is_ws(c) { in_word = false } else { if !in_word { in_word = true count = count + 1 } } i = i + 1 } return count } // Count ASCII letters [A-Za-z]. fn str_count_letters(s: String) -> Int { let n: Int = __str_len(s) let count: Int = 0 let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) if c >= 65 { if c <= 90 { count = count + 1 } // A-Z } if c >= 97 { if c <= 122 { count = count + 1 } // a-z } i = i + 1 } return count } // Count ASCII decimal digits [0-9]. fn str_count_digits(s: String) -> Int { let n: Int = __str_len(s) let count: Int = 0 let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) if c >= 48 { if c <= 57 { count = count + 1 } // '0'-'9' } i = i + 1 } return count } // ── Character classification ────────────────────────────────────────────────── // // For all predicates: empty string -> false. // Multi-char string: ALL bytes must satisfy the predicate. fn is_letter(s: String) -> Bool { let n: Int = __str_len(s) if n == 0 { return false } let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) let ok: Bool = false if c >= 65 { if c <= 90 { ok = true } } // A-Z if c >= 97 { if c <= 122 { ok = true } } // a-z if !ok { return false } i = i + 1 } return true } fn is_digit(s: String) -> Bool { let n: Int = __str_len(s) if n == 0 { return false } let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) if c < 48 { return false } // '0' if c > 57 { return false } // '9' i = i + 1 } return true } fn is_alphanumeric(s: String) -> Bool { let n: Int = __str_len(s) if n == 0 { return false } let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) let ok: Bool = false if c >= 48 { if c <= 57 { ok = true } } // 0-9 if c >= 65 { if c <= 90 { ok = true } } // A-Z if c >= 97 { if c <= 122 { ok = true } } // a-z if !ok { return false } i = i + 1 } return true } fn is_whitespace(s: String) -> Bool { let n: Int = __str_len(s) if n == 0 { return false } let i: Int = 0 while i < n { if !_is_ws(__str_char_at(s, i)) { return false } i = i + 1 } return true } // ASCII punctuation: 33-47, 58-64, 91-96, 123-126. fn is_punctuation(s: String) -> Bool { let n: Int = __str_len(s) if n == 0 { return false } let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) let ok: Bool = false if c >= 33 { if c <= 47 { ok = true } } if c >= 58 { if c <= 64 { ok = true } } if c >= 91 { if c <= 96 { ok = true } } if c >= 123 { if c <= 126 { ok = true } } if !ok { return false } i = i + 1 } return true } fn is_uppercase(s: String) -> Bool { let n: Int = __str_len(s) if n == 0 { return false } let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) if c < 65 { return false } // 'A' if c > 90 { return false } // 'Z' i = i + 1 } return true } fn is_lowercase(s: String) -> Bool { let n: Int = __str_len(s) if n == 0 { return false } let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) if c < 97 { return false } // 'a' if c > 122 { return false } // 'z' i = i + 1 } return true } // ── Splitting ───────────────────────────────────────────────────────────────── fn str_split(s: String, sep: String) -> [String] { let result: [String] = el_list_empty() let slen: Int = __str_len(s) let seplen: Int = __str_len(sep) // Empty separator: return the whole string as a single element. if seplen == 0 { result = el_list_append(result, s) return result } let part_start: Int = 0 let i: Int = 0 while i < slen { if i + seplen <= slen { let window: String = __str_slice_raw(s, i, i + seplen) if __str_cmp(window, sep) == 0 { let part: String = __str_slice_raw(s, part_start, i) result = el_list_append(result, part) i = i + seplen part_start = i } else { i = i + 1 } } else { i = i + 1 } } // Append remaining tail (may be empty string if s ended with sep). let tail: String = __str_slice_raw(s, part_start, slen) result = el_list_append(result, tail) return result } // Split into at most n parts. The nth part (index n-1) contains the remainder // verbatim, including any further separators. n <= 0 returns []. n == 1 // returns [s]. fn str_split_n(s: String, sep: String, n: Int) -> [String] { let result: [String] = el_list_empty() if n <= 0 { return result } if n == 1 { result = el_list_append(result, s) return result } let slen: Int = __str_len(s) let seplen: Int = __str_len(sep) if seplen == 0 { result = el_list_append(result, s) return result } let part_start: Int = 0 let parts: Int = 0 let i: Int = 0 while i < slen { if parts >= n - 1 { // Reached the split limit — stop splitting, emit the rest below. i = slen } else { if i + seplen <= slen { let window: String = __str_slice_raw(s, i, i + seplen) if __str_cmp(window, sep) == 0 { let part: String = __str_slice_raw(s, part_start, i) result = el_list_append(result, part) i = i + seplen part_start = i parts = parts + 1 } else { i = i + 1 } } else { i = i + 1 } } } // Remainder verbatim. let tail: String = __str_slice_raw(s, part_start, slen) result = el_list_append(result, tail) return result } // Split on newlines. \r\n is folded to \n. Trailing empty line after a // final \n is dropped — so "a\nb\n" yields ["a", "b"], not ["a", "b", ""]. fn str_split_lines(s: String) -> [String] { let result: [String] = el_list_empty() let n: Int = __str_len(s) if n == 0 { return result } let line_start: Int = 0 let i: Int = 0 while i < n { let c: Int = __str_char_at(s, i) if c == 10 { // \n let lend: Int = i // Fold \r\n: if the byte before \n is \r, exclude it. if lend > line_start { if __str_char_at(s, lend - 1) == 13 { lend = lend - 1 } } let line: String = __str_slice_raw(s, line_start, lend) result = el_list_append(result, line) line_start = i + 1 } i = i + 1 } // Trailing content with no terminating \n. if line_start < n { let line: String = __str_slice_raw(s, line_start, n) result = el_list_append(result, line) } return result } // Split into a list of one-byte strings (byte-level chars). fn str_split_chars(s: String) -> [String] { let result: [String] = el_list_empty() let n: Int = __str_len(s) let i: Int = 0 while i < n { let ch: String = __str_slice_raw(s, i, i + 1) result = el_list_append(result, ch) i = i + 1 } return result } // ── Joining ─────────────────────────────────────────────────────────────────── // Join a list of strings with a separator between consecutive elements. // Empty list yields "". Non-string elements should not be passed here. fn str_join(parts: [String], sep: String) -> String { let n: Int = el_list_len(parts) if n == 0 { return "" } let result: String = el_list_get(parts, 0) let i: Int = 1 while i < n { result = __str_concat_raw(result, sep) result = __str_concat_raw(result, el_list_get(parts, i)) i = i + 1 } return result } // ── DHARMA byte encoding (str_to_bytes) ────────────────────────────────────── // // str_to_bytes — encode a string as a JSON array of unsigned byte values. // "hi" -> "[104,105]" // Used by db.el to store content in Engram JSON nodes as a byte array. // Note: bytes_to_str (the inverse) is defined in json.el because it depends // on json_array_get_string which is defined there. fn str_to_bytes(s: String) -> String { let n: Int = __str_len(s) if n == 0 { return "[]" } let result: String = "[" let i: Int = 0 while i < n { let b: Int = __str_char_at(s, i) if i > 0 { result = __str_concat_raw(result, ",") } result = __str_concat_raw(result, __int_to_str(b)) i = i + 1 } return __str_concat_raw(result, "]") } // ── Cryptographic hashing ───────────────────────────────────────────────────── // hash_sha256 — return the SHA-256 hex digest of a string. // Delegates to the __sha256_hex seed primitive. fn hash_sha256(s: String) -> String { return __sha256_hex(s) }