908 lines
28 KiB
EmacsLisp
908 lines
28 KiB
EmacsLisp
// runtime/string.el — String operations implemented in El.
|
|
//
|
|
// All functions delegate character-level work to the seed primitives declared
|
|
// in el_seed.c. No C is written here; this is pure El source that compiles
|
|
// to C via the normal El pipeline.
|
|
//
|
|
// Seed primitives used (provided by el_seed.c):
|
|
// __str_len(s) -> Int
|
|
// __str_char_at(s, i) -> Int (char code at byte index i)
|
|
// __str_alloc(n) -> String (n-byte zero-filled mutable buffer)
|
|
// __str_set_char(s, i, c) -> String (mutate s[i]=c, return s)
|
|
// __str_cmp(a, b) -> Int (strcmp)
|
|
// __str_ncmp(a, b, n) -> Int (strncmp)
|
|
// __str_concat_raw(a, b) -> String
|
|
// __str_slice_raw(s, lo, hi) -> String (substring copy [lo, hi))
|
|
// __int_to_str(n) -> String
|
|
// __str_to_int(s) -> Int
|
|
// __float_to_str(f) -> String
|
|
// __str_to_float(s) -> Float
|
|
// __println(s)
|
|
// __print(s)
|
|
// __readline() -> String
|
|
// __url_encode(s) -> String
|
|
// __url_decode(s) -> String
|
|
|
|
// ── I/O ──────────────────────────────────────────────────────────────────────
|
|
|
|
fn println(s: String) -> Void {
|
|
__println(s)
|
|
}
|
|
|
|
fn print(s: String) -> Void {
|
|
__print(s)
|
|
}
|
|
|
|
fn readline() -> String {
|
|
return __readline()
|
|
}
|
|
|
|
// ── Type conversions ──────────────────────────────────────────────────────────
|
|
|
|
fn int_to_str(n: Int) -> String {
|
|
return __int_to_str(n)
|
|
}
|
|
|
|
fn str_to_int(s: String) -> Int {
|
|
return __str_to_int(s)
|
|
}
|
|
|
|
fn float_to_str(f: Float) -> String {
|
|
return __float_to_str(f)
|
|
}
|
|
|
|
fn str_to_float(s: String) -> Float {
|
|
return __str_to_float(s)
|
|
}
|
|
|
|
fn bool_to_str(b: Bool) -> String {
|
|
if b { return "true" }
|
|
return "false"
|
|
}
|
|
|
|
// ── URL encoding ──────────────────────────────────────────────────────────────
|
|
|
|
fn url_encode(s: String) -> String {
|
|
return __url_encode(s)
|
|
}
|
|
|
|
fn url_decode(s: String) -> String {
|
|
return __url_decode(s)
|
|
}
|
|
|
|
// ── Math ──────────────────────────────────────────────────────────────────────
|
|
|
|
fn el_abs(n: Int) -> Int {
|
|
if n < 0 { return 0 - n }
|
|
return n
|
|
}
|
|
|
|
fn el_max(a: Int, b: Int) -> Int {
|
|
if a > b { return a }
|
|
return b
|
|
}
|
|
|
|
fn el_min(a: Int, b: Int) -> Int {
|
|
if a < b { return a }
|
|
return b
|
|
}
|
|
|
|
// ── Core string primitives ────────────────────────────────────────────────────
|
|
|
|
fn str_len(s: String) -> Int {
|
|
return __str_len(s)
|
|
}
|
|
|
|
fn str_eq(a: String, b: String) -> Bool {
|
|
return __str_cmp(a, b) == 0
|
|
}
|
|
|
|
fn str_concat(a: String, b: String) -> String {
|
|
return __str_concat_raw(a, b)
|
|
}
|
|
|
|
fn str_slice(s: String, start: Int, end: Int) -> String {
|
|
let slen: Int = __str_len(s)
|
|
let lo: Int = start
|
|
if lo < 0 { lo = 0 }
|
|
if lo > slen { lo = slen }
|
|
let hi: Int = end
|
|
if hi < lo { hi = lo }
|
|
if hi > slen { hi = slen }
|
|
return __str_slice_raw(s, lo, hi)
|
|
}
|
|
|
|
// ── Whitespace helpers (internal) ─────────────────────────────────────────────
|
|
//
|
|
// _is_ws: returns true for ASCII whitespace (space, tab, \n, \r, \f, \v).
|
|
|
|
fn _is_ws(c: Int) -> Bool {
|
|
if c == 32 { return true } // space
|
|
if c == 9 { return true } // tab
|
|
if c == 10 { return true } // \n
|
|
if c == 13 { return true } // \r
|
|
if c == 12 { return true } // \f
|
|
if c == 11 { return true } // \v
|
|
return false
|
|
}
|
|
|
|
// Scan forward from index 0; return index of first byte not in whitespace,
|
|
// or n if the entire string is whitespace.
|
|
fn _find_first_non_ws(s: String, n: Int) -> Int {
|
|
let i: Int = 0
|
|
while i < n {
|
|
if !_is_ws(__str_char_at(s, i)) { return i }
|
|
i = i + 1
|
|
}
|
|
return n
|
|
}
|
|
|
|
// Scan backward from index n-1; return index of last non-whitespace byte,
|
|
// or -1 if the entire string is whitespace.
|
|
fn _find_last_non_ws(s: String, n: Int) -> Int {
|
|
let i: Int = n - 1
|
|
while i >= 0 {
|
|
if !_is_ws(__str_char_at(s, i)) { return i }
|
|
i = i - 1
|
|
}
|
|
return -1
|
|
}
|
|
|
|
// ── Comparison and search ─────────────────────────────────────────────────────
|
|
|
|
fn str_starts_with(s: String, prefix: String) -> Bool {
|
|
let plen: Int = __str_len(prefix)
|
|
let slen: Int = __str_len(s)
|
|
if plen > slen { return false }
|
|
return __str_ncmp(s, prefix, plen) == 0
|
|
}
|
|
|
|
fn str_ends_with(s: String, suffix: String) -> Bool {
|
|
let slen: Int = __str_len(s)
|
|
let suflen: Int = __str_len(suffix)
|
|
if suflen > slen { return false }
|
|
let tail: String = __str_slice_raw(s, slen - suflen, slen)
|
|
return __str_cmp(tail, suffix) == 0
|
|
}
|
|
|
|
fn str_contains(s: String, sub: String) -> Bool {
|
|
let slen: Int = __str_len(s)
|
|
let sublen: Int = __str_len(sub)
|
|
if sublen == 0 { return true }
|
|
if sublen > slen { return false }
|
|
let limit: Int = slen - sublen
|
|
let i: Int = 0
|
|
while i <= limit {
|
|
let window: String = __str_slice_raw(s, i, i + sublen)
|
|
if __str_cmp(window, sub) == 0 { return true }
|
|
i = i + 1
|
|
}
|
|
return false
|
|
}
|
|
|
|
fn str_index_of(s: String, sub: String) -> Int {
|
|
let slen: Int = __str_len(s)
|
|
let sublen: Int = __str_len(sub)
|
|
if sublen == 0 { return 0 }
|
|
if sublen > slen { return -1 }
|
|
let limit: Int = slen - sublen
|
|
let i: Int = 0
|
|
while i <= limit {
|
|
let window: String = __str_slice_raw(s, i, i + sublen)
|
|
if __str_cmp(window, sub) == 0 { return i }
|
|
i = i + 1
|
|
}
|
|
return -1
|
|
}
|
|
|
|
fn str_last_index_of(s: String, sub: String) -> Int {
|
|
let slen: Int = __str_len(s)
|
|
let sublen: Int = __str_len(sub)
|
|
if sublen == 0 { return slen }
|
|
if sublen > slen { return -1 }
|
|
let last: Int = -1
|
|
let limit: Int = slen - sublen
|
|
let i: Int = 0
|
|
while i <= limit {
|
|
let window: String = __str_slice_raw(s, i, i + sublen)
|
|
if __str_cmp(window, sub) == 0 {
|
|
last = i
|
|
i = i + sublen
|
|
} else {
|
|
i = i + 1
|
|
}
|
|
}
|
|
return last
|
|
}
|
|
|
|
fn str_index_of_all(s: String, sub: String) -> [Int] {
|
|
let result: [Int] = el_list_empty()
|
|
let slen: Int = __str_len(s)
|
|
let sublen: Int = __str_len(sub)
|
|
if sublen == 0 { return result }
|
|
if sublen > slen { return result }
|
|
let limit: Int = slen - sublen
|
|
let i: Int = 0
|
|
while i <= limit {
|
|
let window: String = __str_slice_raw(s, i, i + sublen)
|
|
if __str_cmp(window, sub) == 0 {
|
|
result = el_list_append(result, i)
|
|
i = i + sublen
|
|
} else {
|
|
i = i + 1
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Return the byte index of the first character in s that appears in any_of,
|
|
// or -1 if none found.
|
|
fn str_find_chars(s: String, any_of: String) -> Int {
|
|
let slen: Int = __str_len(s)
|
|
let alen: Int = __str_len(any_of)
|
|
if alen == 0 { return -1 }
|
|
let i: Int = 0
|
|
while i < slen {
|
|
let c: Int = __str_char_at(s, i)
|
|
let j: Int = 0
|
|
while j < alen {
|
|
if c == __str_char_at(any_of, j) { return i }
|
|
j = j + 1
|
|
}
|
|
i = i + 1
|
|
}
|
|
return -1
|
|
}
|
|
|
|
// ── Character access ──────────────────────────────────────────────────────────
|
|
|
|
// Return a one-character string at byte index i, or "" if out of range.
|
|
fn str_char_at(s: String, i: Int) -> String {
|
|
let slen: Int = __str_len(s)
|
|
if i < 0 { return "" }
|
|
if i >= slen { return "" }
|
|
return __str_slice_raw(s, i, i + 1)
|
|
}
|
|
|
|
// Return the char code (byte value) at byte index i, or 0 if out of range.
|
|
fn str_char_code(s: String, i: Int) -> Int {
|
|
let slen: Int = __str_len(s)
|
|
if i < 0 { return 0 }
|
|
if i >= slen { return 0 }
|
|
return __str_char_at(s, i)
|
|
}
|
|
|
|
// ── Case conversion ───────────────────────────────────────────────────────────
|
|
|
|
fn str_to_upper(s: String) -> String {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return "" }
|
|
let out: String = __str_alloc(n)
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
// a-z (97-122) -> A-Z (65-90): subtract 32
|
|
if c >= 97 {
|
|
if c <= 122 { c = c - 32 }
|
|
}
|
|
out = __str_set_char(out, i, c)
|
|
i = i + 1
|
|
}
|
|
return out
|
|
}
|
|
|
|
fn str_to_lower(s: String) -> String {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return "" }
|
|
let out: String = __str_alloc(n)
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
// A-Z (65-90) -> a-z (97-122): add 32
|
|
if c >= 65 {
|
|
if c <= 90 { c = c + 32 }
|
|
}
|
|
out = __str_set_char(out, i, c)
|
|
i = i + 1
|
|
}
|
|
return out
|
|
}
|
|
|
|
// Aliases used in existing El codebases.
|
|
fn str_lower(s: String) -> String {
|
|
return str_to_lower(s)
|
|
}
|
|
|
|
fn str_upper(s: String) -> String {
|
|
return str_to_upper(s)
|
|
}
|
|
|
|
// ── Whitespace trimming ───────────────────────────────────────────────────────
|
|
|
|
fn str_trim(s: String) -> String {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return "" }
|
|
let lo: Int = _find_first_non_ws(s, n)
|
|
if lo == n { return "" }
|
|
let hi: Int = _find_last_non_ws(s, n)
|
|
return __str_slice_raw(s, lo, hi + 1)
|
|
}
|
|
|
|
fn str_lstrip(s: String) -> String {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return "" }
|
|
let lo: Int = _find_first_non_ws(s, n)
|
|
if lo == n { return "" }
|
|
return __str_slice_raw(s, lo, n)
|
|
}
|
|
|
|
fn str_rstrip(s: String) -> String {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return "" }
|
|
let hi: Int = _find_last_non_ws(s, n)
|
|
if hi < 0 { return "" }
|
|
return __str_slice_raw(s, 0, hi + 1)
|
|
}
|
|
|
|
// ── Replacement ───────────────────────────────────────────────────────────────
|
|
|
|
fn str_replace(s: String, from: String, to: String) -> String {
|
|
let slen: Int = __str_len(s)
|
|
let flen: Int = __str_len(from)
|
|
if flen == 0 { return s }
|
|
if slen == 0 { return s }
|
|
// Scan s left-to-right; emit `to` on each match, otherwise emit one byte.
|
|
let result: String = ""
|
|
let i: Int = 0
|
|
while i < slen {
|
|
// Try to match `from` at position i
|
|
if i + flen <= slen {
|
|
let window: String = __str_slice_raw(s, i, i + flen)
|
|
if __str_cmp(window, from) == 0 {
|
|
result = __str_concat_raw(result, to)
|
|
i = i + flen
|
|
} else {
|
|
let ch: String = __str_slice_raw(s, i, i + 1)
|
|
result = __str_concat_raw(result, ch)
|
|
i = i + 1
|
|
}
|
|
} else {
|
|
// Not enough bytes left for a match — emit remainder and stop.
|
|
let tail: String = __str_slice_raw(s, i, slen)
|
|
result = __str_concat_raw(result, tail)
|
|
i = slen
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// ── Repetition and reversal ───────────────────────────────────────────────────
|
|
|
|
fn str_repeat(s: String, n: Int) -> String {
|
|
if n <= 0 { return "" }
|
|
let slen: Int = __str_len(s)
|
|
if slen == 0 { return "" }
|
|
let result: String = ""
|
|
let i: Int = 0
|
|
while i < n {
|
|
result = __str_concat_raw(result, s)
|
|
i = i + 1
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Byte-reverse (correct for ASCII; for multi-byte UTF-8 codepoints this
|
|
// reverses bytes within a codepoint, which is intentional at this tier —
|
|
// Phase 2 will add grapheme-aware reversal).
|
|
fn str_reverse(s: String) -> String {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return "" }
|
|
let out: String = __str_alloc(n)
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
out = __str_set_char(out, n - 1 - i, c)
|
|
i = i + 1
|
|
}
|
|
return out
|
|
}
|
|
|
|
// ── Prefix/suffix stripping ───────────────────────────────────────────────────
|
|
|
|
fn str_strip_prefix(s: String, prefix: String) -> String {
|
|
let slen: Int = __str_len(s)
|
|
let plen: Int = __str_len(prefix)
|
|
if plen == 0 { return s }
|
|
if plen > slen { return s }
|
|
if __str_ncmp(s, prefix, plen) == 0 {
|
|
return __str_slice_raw(s, plen, slen)
|
|
}
|
|
return s
|
|
}
|
|
|
|
fn str_strip_suffix(s: String, suffix: String) -> String {
|
|
let slen: Int = __str_len(s)
|
|
let suflen: Int = __str_len(suffix)
|
|
if suflen == 0 { return s }
|
|
if suflen > slen { return s }
|
|
let tail: String = __str_slice_raw(s, slen - suflen, slen)
|
|
if __str_cmp(tail, suffix) == 0 {
|
|
return __str_slice_raw(s, 0, slen - suflen)
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Strip leading and trailing bytes whose char code appears in `chars`.
|
|
fn str_strip_chars(s: String, chars: String) -> String {
|
|
let slen: Int = __str_len(s)
|
|
let clen: Int = __str_len(chars)
|
|
if slen == 0 { return "" }
|
|
if clen == 0 { return s }
|
|
let lo: Int = _find_first_not_in_charset(s, chars, slen, clen)
|
|
if lo == slen { return "" }
|
|
let hi: Int = _find_last_not_in_charset(s, chars, slen, clen)
|
|
return __str_slice_raw(s, lo, hi + 1)
|
|
}
|
|
|
|
// Internal: true if char code `c` is present in the charset string.
|
|
fn _char_in_set(c: Int, chars: String, clen: Int) -> Bool {
|
|
let j: Int = 0
|
|
while j < clen {
|
|
if c == __str_char_at(chars, j) { return true }
|
|
j = j + 1
|
|
}
|
|
return false
|
|
}
|
|
|
|
fn _find_first_not_in_charset(s: String, chars: String, slen: Int, clen: Int) -> Int {
|
|
let i: Int = 0
|
|
while i < slen {
|
|
if !_char_in_set(__str_char_at(s, i), chars, clen) { return i }
|
|
i = i + 1
|
|
}
|
|
return slen
|
|
}
|
|
|
|
fn _find_last_not_in_charset(s: String, chars: String, slen: Int, clen: Int) -> Int {
|
|
let i: Int = slen - 1
|
|
while i >= 0 {
|
|
if !_char_in_set(__str_char_at(s, i), chars, clen) { return i }
|
|
i = i - 1
|
|
}
|
|
return -1
|
|
}
|
|
|
|
// ── Padding ───────────────────────────────────────────────────────────────────
|
|
|
|
// Pad s on the left to `width` total chars, repeating `pad` cyclically.
|
|
fn str_pad_left(s: String, width: Int, pad: String) -> String {
|
|
let slen: Int = __str_len(s)
|
|
if slen >= width { return s }
|
|
let plen: Int = __str_len(pad)
|
|
if plen == 0 { return s }
|
|
let need: Int = width - slen
|
|
let prefix: String = ""
|
|
let i: Int = 0
|
|
while i < need {
|
|
// Select pad character at position (i mod plen)
|
|
let pad_idx: Int = i - (i / plen) * plen
|
|
let pc: String = __str_slice_raw(pad, pad_idx, pad_idx + 1)
|
|
prefix = __str_concat_raw(prefix, pc)
|
|
i = i + 1
|
|
}
|
|
return __str_concat_raw(prefix, s)
|
|
}
|
|
|
|
// Pad s on the right to `width` total chars, repeating `pad` cyclically.
|
|
fn str_pad_right(s: String, width: Int, pad: String) -> String {
|
|
let slen: Int = __str_len(s)
|
|
if slen >= width { return s }
|
|
let plen: Int = __str_len(pad)
|
|
if plen == 0 { return s }
|
|
let need: Int = width - slen
|
|
let suffix: String = ""
|
|
let i: Int = 0
|
|
while i < need {
|
|
let pad_idx: Int = i - (i / plen) * plen
|
|
let pc: String = __str_slice_raw(pad, pad_idx, pad_idx + 1)
|
|
suffix = __str_concat_raw(suffix, pc)
|
|
i = i + 1
|
|
}
|
|
return __str_concat_raw(s, suffix)
|
|
}
|
|
|
|
// ── Counting ──────────────────────────────────────────────────────────────────
|
|
|
|
// Count non-overlapping occurrences of `sub` in `s`. Empty sub returns 0.
|
|
fn str_count(s: String, sub: String) -> Int {
|
|
let slen: Int = __str_len(s)
|
|
let sublen: Int = __str_len(sub)
|
|
if sublen == 0 { return 0 }
|
|
if sublen > slen { return 0 }
|
|
let count: Int = 0
|
|
let limit: Int = slen - sublen
|
|
let i: Int = 0
|
|
while i <= limit {
|
|
let window: String = __str_slice_raw(s, i, i + sublen)
|
|
if __str_cmp(window, sub) == 0 {
|
|
count = count + 1
|
|
i = i + sublen
|
|
} else {
|
|
i = i + 1
|
|
}
|
|
}
|
|
return count
|
|
}
|
|
|
|
// Byte count — alias of str_len.
|
|
fn str_count_bytes(s: String) -> Int {
|
|
return __str_len(s)
|
|
}
|
|
|
|
// UTF-8 codepoint count: count bytes that are NOT continuation bytes (10xxxxxx).
|
|
// Continuation bytes have the pattern 10xxxxxx = 0x80..0xBF (128..191).
|
|
fn str_count_chars(s: String) -> Int {
|
|
let n: Int = __str_len(s)
|
|
let count: Int = 0
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
// Continuation bytes are in range [128, 191]; skip them.
|
|
// All other bytes (< 128 ASCII, or >= 192 leading bytes) start a codepoint.
|
|
if c < 128 {
|
|
count = count + 1
|
|
} else {
|
|
if c >= 192 { count = count + 1 }
|
|
}
|
|
i = i + 1
|
|
}
|
|
return count
|
|
}
|
|
|
|
// Count newline-delimited lines. A trailing newline does NOT add an extra empty line.
|
|
fn str_count_lines(s: String) -> Int {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return 0 }
|
|
let count: Int = 0
|
|
let has_content: Bool = false
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
has_content = true
|
|
if c == 10 { // \n
|
|
count = count + 1
|
|
has_content = false
|
|
}
|
|
i = i + 1
|
|
}
|
|
if has_content { count = count + 1 }
|
|
return count
|
|
}
|
|
|
|
// Count whitespace-delimited words (non-empty tokens).
|
|
fn str_count_words(s: String) -> Int {
|
|
let n: Int = __str_len(s)
|
|
let count: Int = 0
|
|
let in_word: Bool = false
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
if _is_ws(c) {
|
|
in_word = false
|
|
} else {
|
|
if !in_word {
|
|
in_word = true
|
|
count = count + 1
|
|
}
|
|
}
|
|
i = i + 1
|
|
}
|
|
return count
|
|
}
|
|
|
|
// Count ASCII letters [A-Za-z].
|
|
fn str_count_letters(s: String) -> Int {
|
|
let n: Int = __str_len(s)
|
|
let count: Int = 0
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
if c >= 65 {
|
|
if c <= 90 { count = count + 1 } // A-Z
|
|
}
|
|
if c >= 97 {
|
|
if c <= 122 { count = count + 1 } // a-z
|
|
}
|
|
i = i + 1
|
|
}
|
|
return count
|
|
}
|
|
|
|
// Count ASCII decimal digits [0-9].
|
|
fn str_count_digits(s: String) -> Int {
|
|
let n: Int = __str_len(s)
|
|
let count: Int = 0
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
if c >= 48 {
|
|
if c <= 57 { count = count + 1 } // '0'-'9'
|
|
}
|
|
i = i + 1
|
|
}
|
|
return count
|
|
}
|
|
|
|
// ── Character classification ──────────────────────────────────────────────────
|
|
//
|
|
// For all predicates: empty string -> false.
|
|
// Multi-char string: ALL bytes must satisfy the predicate.
|
|
|
|
fn is_letter(s: String) -> Bool {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return false }
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
let ok: Bool = false
|
|
if c >= 65 { if c <= 90 { ok = true } } // A-Z
|
|
if c >= 97 { if c <= 122 { ok = true } } // a-z
|
|
if !ok { return false }
|
|
i = i + 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
fn is_digit(s: String) -> Bool {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return false }
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
if c < 48 { return false } // '0'
|
|
if c > 57 { return false } // '9'
|
|
i = i + 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
fn is_alphanumeric(s: String) -> Bool {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return false }
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
let ok: Bool = false
|
|
if c >= 48 { if c <= 57 { ok = true } } // 0-9
|
|
if c >= 65 { if c <= 90 { ok = true } } // A-Z
|
|
if c >= 97 { if c <= 122 { ok = true } } // a-z
|
|
if !ok { return false }
|
|
i = i + 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
fn is_whitespace(s: String) -> Bool {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return false }
|
|
let i: Int = 0
|
|
while i < n {
|
|
if !_is_ws(__str_char_at(s, i)) { return false }
|
|
i = i + 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
// ASCII punctuation: 33-47, 58-64, 91-96, 123-126.
|
|
fn is_punctuation(s: String) -> Bool {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return false }
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
let ok: Bool = false
|
|
if c >= 33 { if c <= 47 { ok = true } }
|
|
if c >= 58 { if c <= 64 { ok = true } }
|
|
if c >= 91 { if c <= 96 { ok = true } }
|
|
if c >= 123 { if c <= 126 { ok = true } }
|
|
if !ok { return false }
|
|
i = i + 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
fn is_uppercase(s: String) -> Bool {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return false }
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
if c < 65 { return false } // 'A'
|
|
if c > 90 { return false } // 'Z'
|
|
i = i + 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
fn is_lowercase(s: String) -> Bool {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return false }
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
if c < 97 { return false } // 'a'
|
|
if c > 122 { return false } // 'z'
|
|
i = i + 1
|
|
}
|
|
return true
|
|
}
|
|
|
|
// ── Splitting ─────────────────────────────────────────────────────────────────
|
|
|
|
fn str_split(s: String, sep: String) -> [String] {
|
|
let result: [String] = el_list_empty()
|
|
let slen: Int = __str_len(s)
|
|
let seplen: Int = __str_len(sep)
|
|
// Empty separator: return the whole string as a single element.
|
|
if seplen == 0 {
|
|
result = el_list_append(result, s)
|
|
return result
|
|
}
|
|
let part_start: Int = 0
|
|
let i: Int = 0
|
|
while i < slen {
|
|
if i + seplen <= slen {
|
|
let window: String = __str_slice_raw(s, i, i + seplen)
|
|
if __str_cmp(window, sep) == 0 {
|
|
let part: String = __str_slice_raw(s, part_start, i)
|
|
result = el_list_append(result, part)
|
|
i = i + seplen
|
|
part_start = i
|
|
} else {
|
|
i = i + 1
|
|
}
|
|
} else {
|
|
i = i + 1
|
|
}
|
|
}
|
|
// Append remaining tail (may be empty string if s ended with sep).
|
|
let tail: String = __str_slice_raw(s, part_start, slen)
|
|
result = el_list_append(result, tail)
|
|
return result
|
|
}
|
|
|
|
// Split into at most n parts. The nth part (index n-1) contains the remainder
|
|
// verbatim, including any further separators. n <= 0 returns []. n == 1
|
|
// returns [s].
|
|
fn str_split_n(s: String, sep: String, n: Int) -> [String] {
|
|
let result: [String] = el_list_empty()
|
|
if n <= 0 { return result }
|
|
if n == 1 {
|
|
result = el_list_append(result, s)
|
|
return result
|
|
}
|
|
let slen: Int = __str_len(s)
|
|
let seplen: Int = __str_len(sep)
|
|
if seplen == 0 {
|
|
result = el_list_append(result, s)
|
|
return result
|
|
}
|
|
let part_start: Int = 0
|
|
let parts: Int = 0
|
|
let i: Int = 0
|
|
while i < slen {
|
|
if parts >= n - 1 {
|
|
// Reached the split limit — stop splitting, emit the rest below.
|
|
i = slen
|
|
} else {
|
|
if i + seplen <= slen {
|
|
let window: String = __str_slice_raw(s, i, i + seplen)
|
|
if __str_cmp(window, sep) == 0 {
|
|
let part: String = __str_slice_raw(s, part_start, i)
|
|
result = el_list_append(result, part)
|
|
i = i + seplen
|
|
part_start = i
|
|
parts = parts + 1
|
|
} else {
|
|
i = i + 1
|
|
}
|
|
} else {
|
|
i = i + 1
|
|
}
|
|
}
|
|
}
|
|
// Remainder verbatim.
|
|
let tail: String = __str_slice_raw(s, part_start, slen)
|
|
result = el_list_append(result, tail)
|
|
return result
|
|
}
|
|
|
|
// Split on newlines. \r\n is folded to \n. Trailing empty line after a
|
|
// final \n is dropped — so "a\nb\n" yields ["a", "b"], not ["a", "b", ""].
|
|
fn str_split_lines(s: String) -> [String] {
|
|
let result: [String] = el_list_empty()
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return result }
|
|
let line_start: Int = 0
|
|
let i: Int = 0
|
|
while i < n {
|
|
let c: Int = __str_char_at(s, i)
|
|
if c == 10 { // \n
|
|
let lend: Int = i
|
|
// Fold \r\n: if the byte before \n is \r, exclude it.
|
|
if lend > line_start {
|
|
if __str_char_at(s, lend - 1) == 13 { lend = lend - 1 }
|
|
}
|
|
let line: String = __str_slice_raw(s, line_start, lend)
|
|
result = el_list_append(result, line)
|
|
line_start = i + 1
|
|
}
|
|
i = i + 1
|
|
}
|
|
// Trailing content with no terminating \n.
|
|
if line_start < n {
|
|
let line: String = __str_slice_raw(s, line_start, n)
|
|
result = el_list_append(result, line)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Split into a list of one-byte strings (byte-level chars).
|
|
fn str_split_chars(s: String) -> [String] {
|
|
let result: [String] = el_list_empty()
|
|
let n: Int = __str_len(s)
|
|
let i: Int = 0
|
|
while i < n {
|
|
let ch: String = __str_slice_raw(s, i, i + 1)
|
|
result = el_list_append(result, ch)
|
|
i = i + 1
|
|
}
|
|
return result
|
|
}
|
|
|
|
// ── Joining ───────────────────────────────────────────────────────────────────
|
|
|
|
// Join a list of strings with a separator between consecutive elements.
|
|
// Empty list yields "". Non-string elements should not be passed here.
|
|
fn str_join(parts: [String], sep: String) -> String {
|
|
let n: Int = el_list_len(parts)
|
|
if n == 0 { return "" }
|
|
let result: String = el_list_get(parts, 0)
|
|
let i: Int = 1
|
|
while i < n {
|
|
result = __str_concat_raw(result, sep)
|
|
result = __str_concat_raw(result, el_list_get(parts, i))
|
|
i = i + 1
|
|
}
|
|
return result
|
|
}
|
|
|
|
// ── DHARMA byte encoding (str_to_bytes) ──────────────────────────────────────
|
|
//
|
|
// str_to_bytes — encode a string as a JSON array of unsigned byte values.
|
|
// "hi" -> "[104,105]"
|
|
// Used by db.el to store content in Engram JSON nodes as a byte array.
|
|
// Note: bytes_to_str (the inverse) is defined in json.el because it depends
|
|
// on json_array_get_string which is defined there.
|
|
fn str_to_bytes(s: String) -> String {
|
|
let n: Int = __str_len(s)
|
|
if n == 0 { return "[]" }
|
|
let result: String = "["
|
|
let i: Int = 0
|
|
while i < n {
|
|
let b: Int = __str_char_at(s, i)
|
|
if i > 0 { result = __str_concat_raw(result, ",") }
|
|
result = __str_concat_raw(result, __int_to_str(b))
|
|
i = i + 1
|
|
}
|
|
return __str_concat_raw(result, "]")
|
|
}
|
|
|
|
// ── Cryptographic hashing ─────────────────────────────────────────────────────
|
|
|
|
// hash_sha256 — return the SHA-256 hex digest of a string.
|
|
// Delegates to the __sha256_hex seed primitive.
|
|
fn hash_sha256(s: String) -> String {
|
|
return __sha256_hex(s)
|
|
}
|