merge round-4-delta: flat stride-2 token list + str_char_code dispatch + batch c_escape
- Flat token list: lexer emits [kind0, val0, kind1, val1, ...] instead of [{kind,val}, ...]
Eliminates per-token ElMap allocation (~112B × N tokens)
- str_char_code hot loop: char classification via Int codes, no strdup per char
- Batch c_escape: str_slice clean runs instead of char-at per byte
- Parser updated to use tok_at/tok_kind/tok_value stride-2 accessors
This commit is contained in:
@@ -38,10 +38,13 @@ fn is_hex_digit_byte(b: Int) -> Bool {
|
||||
}
|
||||
|
||||
fn c_escape(s: String) -> String {
|
||||
// Use index-based byte scanning via str_char_code(s, i) and str_char_at(s, i).
|
||||
// This avoids native_string_chars + str_join, which corrupts high-byte (>= 0x80)
|
||||
// characters because list_join's looks_like_string heuristic rejects strings
|
||||
// whose first byte is >= 0x7F and emits them as decimal pointer values instead.
|
||||
// Batch ASCII chars using str_slice instead of str_char_at per byte.
|
||||
// Track clean_start: the beginning of the current run of bytes that need
|
||||
// no escaping. On each special byte, flush the accumulated clean run via
|
||||
// str_slice, then append the escape. This reduces parts-list appends from
|
||||
// O(N) to O(K) where K = number of special bytes << N for normal strings.
|
||||
//
|
||||
// Special bytes: '"'=34, '\\'=92, '\n'=10, '\r'=13, '\t'=9, any byte>=128.
|
||||
//
|
||||
// IMPORTANT: after a \xNN hex escape, if the next byte is a hex digit
|
||||
// (0-9, a-f, A-F), we emit `""` to split the C string literal so the C
|
||||
@@ -51,46 +54,75 @@ fn c_escape(s: String) -> String {
|
||||
let total: Int = str_len(s)
|
||||
let parts: [String] = native_list_empty()
|
||||
let i: Int = 0
|
||||
let clean_start: Int = 0
|
||||
let prev_was_hex_escape: Bool = false
|
||||
while i < total {
|
||||
let bval: Int = str_char_code(s, i)
|
||||
// If the previous token was a \xNN escape and the current byte is a
|
||||
// hex digit, insert an empty string literal ("") to break the escape.
|
||||
// Handle the hex-escape split case first: if prev was \xNN and this
|
||||
// byte is a hex digit, we must flush the clean run and insert "".
|
||||
// (At this point clean_start == i since the previous special byte
|
||||
// already reset it, so flush is a no-op unless something is pending.)
|
||||
if prev_was_hex_escape {
|
||||
if is_hex_digit_byte(bval) {
|
||||
// Flush any accumulated clean bytes before the split marker.
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\"\"")
|
||||
let clean_start = i
|
||||
}
|
||||
}
|
||||
let prev_was_hex_escape = false
|
||||
if bval == 34 {
|
||||
// 34 = '"'
|
||||
// 34 = '"' — flush clean run, then escape
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\\"")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 92 {
|
||||
// 92 = '\\'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\\\")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 10 {
|
||||
// 10 = '\n'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\n")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 13 {
|
||||
// 13 = '\r'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\r")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 9 {
|
||||
// 9 = '\t'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\t")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval >= 128 {
|
||||
// Escape non-ASCII bytes (>= 0x80) as \xNN so
|
||||
// Clang does not misinterpret multi-byte UTF-8
|
||||
// sequences in C string literals.
|
||||
// Non-ASCII: flush, then \xNN
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\x" + byte_to_hex2(bval))
|
||||
let prev_was_hex_escape = true
|
||||
} else {
|
||||
let parts = native_list_append(parts, str_char_at(s, i))
|
||||
let clean_start = i + 1
|
||||
}
|
||||
// else: plain ASCII — extends the current clean run (no append)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -98,6 +130,10 @@ fn c_escape(s: String) -> String {
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
// Flush the final clean run if any
|
||||
if clean_start < total {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, total))
|
||||
}
|
||||
str_join(parts, "")
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ import "codegen-js.el"
|
||||
|
||||
// compile — full pipeline (C target): source string -> C source string
|
||||
fn compile(source: String) -> String {
|
||||
let tokens: [Map<String, Any>] = lex(source)
|
||||
let tokens: [Any] = lex(source)
|
||||
let stmts: [Map<String, Any>] = parse(tokens)
|
||||
// Token list is no longer needed after parsing — release it to free memory
|
||||
// before codegen allocates its own working data on large source files.
|
||||
@@ -31,7 +31,7 @@ fn compile(source: String) -> String {
|
||||
|
||||
// compile_js — full pipeline (JS target, module mode): source string -> JS source string
|
||||
fn compile_js(source: String) -> String {
|
||||
let tokens: [Map<String, Any>] = lex(source)
|
||||
let tokens: [Any] = lex(source)
|
||||
let stmts: [Map<String, Any>] = parse(tokens)
|
||||
// Token list is no longer needed after parsing — release it to free memory.
|
||||
el_release(tokens)
|
||||
@@ -41,7 +41,7 @@ fn compile_js(source: String) -> String {
|
||||
// compile_js_with_bundle — JS target in bundle mode.
|
||||
// Reads el_runtime.js from runtime_path and inlines it inside an IIFE.
|
||||
fn compile_js_with_bundle(source: String, runtime_path: String) -> String {
|
||||
let tokens: [Map<String, Any>] = lex(source)
|
||||
let tokens: [Any] = lex(source)
|
||||
let stmts: [Map<String, Any>] = parse(tokens)
|
||||
el_release(tokens)
|
||||
let runtime_content: String = fs_read(runtime_path)
|
||||
@@ -501,7 +501,7 @@ fn main() -> Void {
|
||||
// (without inlining imports) and write out a .elh file alongside the .c.
|
||||
if do_emit_header {
|
||||
let raw_source: String = fs_read(src_path)
|
||||
let hdr_tokens: [Map<String, Any>] = lex(raw_source)
|
||||
let hdr_tokens: [Any] = lex(raw_source)
|
||||
let hdr_stmts: [Map<String, Any>] = parse(hdr_tokens)
|
||||
el_release(hdr_tokens)
|
||||
let hdr_path: String = str_slice(src_path, 0, str_len(src_path) - 3) + ".elh"
|
||||
|
||||
+317
-231
@@ -7,11 +7,50 @@
|
||||
//
|
||||
// Entry point: fn lex(source: String) -> [Map<String, Any>]
|
||||
//
|
||||
// Uses native_string_chars to split the source into a chars list,
|
||||
// then indexes it with native_list_get - avoids O(N-) string cloning.
|
||||
// Performance: the hot lexer loop uses str_char_code (returns Int) instead of
|
||||
// str_char_at (returns strdup'd String) for character classification.
|
||||
// For a 400KB source, str_char_at allocates ~400K × 16B = ~6.4MB of temporary
|
||||
// strings for the `ch` variable alone. str_char_code avoids all that.
|
||||
|
||||
// -- Character helpers ---------------------------------------------------------
|
||||
// -- Character helpers (Int-based, no string allocation) ----------------------
|
||||
// These operate on char codes (from str_char_code) instead of str_char_at,
|
||||
// eliminating one strdup per character in the hot lexer loop.
|
||||
|
||||
fn is_digit_code(c: Int) -> Bool {
|
||||
// '0'=48 .. '9'=57
|
||||
if c >= 48 {
|
||||
if c <= 57 { return true }
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn is_alpha_code(c: Int) -> Bool {
|
||||
// 'A'=65..'Z'=90, 'a'=97..'z'=122
|
||||
if c >= 65 {
|
||||
if c <= 90 { return true }
|
||||
}
|
||||
if c >= 97 {
|
||||
if c <= 122 { return true }
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn is_alnum_or_underscore_code(c: Int) -> Bool {
|
||||
if is_digit_code(c) { return true }
|
||||
if is_alpha_code(c) { return true }
|
||||
if c == 95 { return true } // '_'
|
||||
false
|
||||
}
|
||||
|
||||
fn is_ws_code(c: Int) -> Bool {
|
||||
if c == 32 { return true } // ' '
|
||||
if c == 9 { return true } // '\t'
|
||||
if c == 10 { return true } // '\n'
|
||||
if c == 13 { return true } // '\r'
|
||||
false
|
||||
}
|
||||
|
||||
// Legacy String-based helpers kept for scan_interp helpers that use str_char_at.
|
||||
fn lex_is_digit(ch: String) -> Bool {
|
||||
if ch == "0" { return true }
|
||||
if ch == "1" { return true }
|
||||
@@ -97,8 +136,11 @@ fn lex_is_whitespace(ch: String) -> Bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn make_tok(kind: String, value: String) -> Map<String, Any> {
|
||||
{ "kind": kind, "value": value }
|
||||
// tok_append — append a (kind, value) pair to a flat token list.
|
||||
// Returns the updated list. Gamma combines flat-list + char-code for max savings.
|
||||
fn tok_append(tokens: [Any], kind: String, value: String) -> [Any] {
|
||||
let tokens = native_list_append(tokens, kind)
|
||||
native_list_append(tokens, value)
|
||||
}
|
||||
|
||||
// -- Keyword lookup ------------------------------------------------------------
|
||||
@@ -157,45 +199,43 @@ fn keyword_kind(word: String) -> String {
|
||||
|
||||
// scan_digits - advance i while chars[i] is a digit
|
||||
// Returns { "text": ..., "pos": i }
|
||||
fn scan_digits(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
fn scan_digits(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let running = true
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
if lex_is_digit(ch) {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let c: Int = str_char_code(src, i)
|
||||
if is_digit_code(c) {
|
||||
let i = i + 1
|
||||
} else {
|
||||
let running = false
|
||||
}
|
||||
}
|
||||
}
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
// Use str_slice instead of building a parts list — O(1) allocation, O(n) copy.
|
||||
{ "text": str_slice(src, start, i), "pos": i }
|
||||
}
|
||||
|
||||
// scan_ident - advance i while chars[i] is alphanumeric or underscore
|
||||
fn scan_ident(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
fn scan_ident(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let running = true
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
if is_alnum_or_underscore(ch) {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let c: Int = str_char_code(src, i)
|
||||
if is_alnum_or_underscore_code(c) {
|
||||
let i = i + 1
|
||||
} else {
|
||||
let running = false
|
||||
}
|
||||
}
|
||||
}
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
// Use str_slice instead of building a parts list — O(1) allocation, O(n) copy.
|
||||
{ "text": str_slice(src, start, i), "pos": i }
|
||||
}
|
||||
|
||||
// -- Code-bearing string detection + comment strip ----------------------------
|
||||
@@ -208,34 +248,16 @@ fn scan_ident(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
// looks_like_code - heuristic gate so we only strip strings that actually
|
||||
// embed JS or CSS. Plain prose, hex blobs, JSON, etc. pass through verbatim.
|
||||
|
||||
fn substr_at(chars: [String], start: Int, total: Int, needle: String) -> Bool {
|
||||
let nchars: [String] = native_string_chars(needle)
|
||||
let nlen: Int = native_list_len(nchars)
|
||||
fn substr_at(src: String, start: Int, total: Int, needle: String) -> Bool {
|
||||
let nlen: Int = str_len(needle)
|
||||
if start + nlen > total { return false }
|
||||
let i = 0
|
||||
let matched = true
|
||||
while i < nlen {
|
||||
let a: String = native_list_get(chars, start + i)
|
||||
let b: String = native_list_get(nchars, i)
|
||||
if a == b { let i = i + 1 } else { let matched = false; let i = nlen }
|
||||
}
|
||||
matched
|
||||
// Use str_slice comparison instead of char-by-char loop.
|
||||
str_eq(str_slice(src, start, start + nlen), needle)
|
||||
}
|
||||
|
||||
fn str_has(s: String, needle: String) -> Bool {
|
||||
let chars: [String] = native_string_chars(s)
|
||||
let total: Int = native_list_len(chars)
|
||||
let i = 0
|
||||
let found = false
|
||||
while i < total {
|
||||
if substr_at(chars, i, total, needle) {
|
||||
let found = true
|
||||
let i = total
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
found
|
||||
// Use the built-in str_contains which is implemented in native C — O(n) single pass.
|
||||
str_contains(s, needle)
|
||||
}
|
||||
|
||||
fn looks_like_code(s: String) -> Bool {
|
||||
@@ -254,8 +276,7 @@ fn looks_like_code(s: String) -> Bool {
|
||||
// comment opener: if the char immediately before '/' is ':', emit the '/'
|
||||
// literally and advance one position.
|
||||
fn strip_code_comments(s: String) -> String {
|
||||
let chars: [String] = native_string_chars(s)
|
||||
let total: Int = native_list_len(chars)
|
||||
let total: Int = str_len(s)
|
||||
let out_parts: [String] = native_list_empty()
|
||||
let i = 0
|
||||
let in_squote = false
|
||||
@@ -263,7 +284,7 @@ fn strip_code_comments(s: String) -> String {
|
||||
let in_btick = false
|
||||
let prev = ""
|
||||
while i < total {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
let ch: String = str_char_at(s, i)
|
||||
let in_js_string = false
|
||||
if in_squote { let in_js_string = true }
|
||||
if in_dquote { let in_js_string = true }
|
||||
@@ -275,7 +296,7 @@ fn strip_code_comments(s: String) -> String {
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let nc: String = native_list_get(chars, next_i)
|
||||
let nc: String = str_char_at(s, next_i)
|
||||
let out_parts = native_list_append(out_parts, nc)
|
||||
let prev = nc
|
||||
let i = next_i + 1
|
||||
@@ -304,7 +325,7 @@ fn strip_code_comments(s: String) -> String {
|
||||
let next_i = i + 1
|
||||
let next_ch = ""
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
let next_ch: String = str_char_at(s, next_i)
|
||||
}
|
||||
|
||||
if ch == "/" {
|
||||
@@ -323,7 +344,7 @@ fn strip_code_comments(s: String) -> String {
|
||||
if i >= total {
|
||||
let scanning = false
|
||||
} else {
|
||||
let lc: String = native_list_get(chars, i)
|
||||
let lc: String = str_char_at(s, i)
|
||||
if lc == "\n" {
|
||||
let scanning = false
|
||||
} else {
|
||||
@@ -342,11 +363,11 @@ fn strip_code_comments(s: String) -> String {
|
||||
if i >= total {
|
||||
let scanning2 = false
|
||||
} else {
|
||||
let bc: String = native_list_get(chars, i)
|
||||
let bc: String = str_char_at(s, i)
|
||||
if bc == "*" {
|
||||
let after = i + 1
|
||||
if after < total {
|
||||
let nc2: String = native_list_get(chars, after)
|
||||
let nc2: String = str_char_at(s, after)
|
||||
if nc2 == "/" {
|
||||
let i = after + 1
|
||||
let scanning2 = false
|
||||
@@ -402,7 +423,7 @@ fn strip_code_comments(s: String) -> String {
|
||||
|
||||
// scan_string - scan a quoted string literal, handling \" escapes.
|
||||
// Starts AFTER the opening quote. Returns { "text": content, "pos": i_after_close }
|
||||
fn scan_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
fn scan_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let running = true
|
||||
@@ -410,12 +431,12 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
let ch: String = str_char_at(src, i)
|
||||
if ch == "\\" {
|
||||
// escape: peek next char
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
let next_ch: String = str_char_at(src, next_i)
|
||||
if next_ch == "\"" {
|
||||
let parts = native_list_append(parts, "\"")
|
||||
let i = next_i + 1
|
||||
@@ -465,19 +486,17 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
// scan_interp_brace - scan from `start` (the char after `${`) to the matching
|
||||
// `}`, tracking brace depth so inner braces (e.g. fn calls, map literals) are
|
||||
// handled correctly. Returns { "text": inner_source, "pos": i_after_close }.
|
||||
fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
fn scan_interp_brace(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let depth = 1
|
||||
let running = true
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
let ch: String = str_char_at(src, i)
|
||||
if ch == "{" {
|
||||
let depth = depth + 1
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "}" {
|
||||
@@ -487,33 +506,33 @@ fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map<String, Any
|
||||
let i = i + 1
|
||||
let running = false
|
||||
} else {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
// Use str_slice instead of parts list — the inner source is a contiguous substring.
|
||||
{ "text": str_slice(src, start, i - 1), "pos": i }
|
||||
}
|
||||
|
||||
// interp_tokens_append_all - copy every token from src into dst, skipping the
|
||||
// trailing Eof sentinel that lex() always appends. Returns the updated dst list.
|
||||
fn interp_tokens_append_all(dst: [Map<String, Any>], src: [Map<String, Any>]) -> [Map<String, Any>] {
|
||||
// interp_tokens_append_all - copy every (kind, value) pair from flat src list
|
||||
// into flat dst list, skipping the trailing Eof pair that lex() always appends.
|
||||
fn interp_tokens_append_all(dst: [Any], src: [Any]) -> [Any] {
|
||||
let src_len: Int = native_list_len(src)
|
||||
let j = 0
|
||||
let result = dst
|
||||
while j < src_len {
|
||||
let tok: Map<String, Any> = native_list_get(src, j)
|
||||
let tk: String = tok["kind"]
|
||||
if tk == "Eof" {
|
||||
let kind: String = native_list_get(src, j)
|
||||
if kind == "Eof" {
|
||||
let j = src_len
|
||||
} else {
|
||||
let result = native_list_append(result, tok)
|
||||
let j = j + 1
|
||||
let val: String = native_list_get(src, j + 1)
|
||||
let result = native_list_append(result, kind)
|
||||
let result = native_list_append(result, val)
|
||||
let j = j + 2
|
||||
}
|
||||
}
|
||||
result
|
||||
@@ -536,10 +555,17 @@ fn interp_tokens_append_all(dst: [Map<String, Any>], src: [Map<String, Any>]) ->
|
||||
//
|
||||
// Supported escape sequences: \" \n \t \r \\ \$ (literal dollar sign).
|
||||
// Nested quotes inside ${} are not supported; use a variable instead.
|
||||
fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
//
|
||||
// Performance: uses str_char_code (Int) for all character dispatch, eliminating
|
||||
// per-character strdup. Plain runs are batched into str_slice segments instead
|
||||
// of accumulating single-char strings, reducing list appends from O(N) to O(K)
|
||||
// where K = number of escape/special chars in the literal.
|
||||
// Char codes: '\' = 92, '"' = 34, '$' = 36, '{' = 123
|
||||
fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let out_tokens: [Map<String, Any>] = native_list_empty()
|
||||
let cur_part: [String] = native_list_empty()
|
||||
let out_tokens: [Any] = native_list_empty()
|
||||
let cur_parts: [String] = native_list_empty()
|
||||
let clean_start = start
|
||||
let has_interp = false
|
||||
let need_plus = false
|
||||
let running = true
|
||||
@@ -548,39 +574,55 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, An
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
let c: Int = str_char_code(src, i)
|
||||
|
||||
if ch == "\\" {
|
||||
// Escape sequence
|
||||
if c == 92 {
|
||||
// '\\' = 92 — escape sequence: flush clean run, append resolved char
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
if next_ch == "$" {
|
||||
// \$ => literal '$' (escape for interpolation syntax)
|
||||
let cur_part = native_list_append(cur_part, "$")
|
||||
let nc: Int = str_char_code(src, next_i)
|
||||
if nc == 36 {
|
||||
// '\$' => literal '$' (36 = '$')
|
||||
let cur_parts = native_list_append(cur_parts, "$")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "\"" {
|
||||
let cur_part = native_list_append(cur_part, "\"")
|
||||
if nc == 34 {
|
||||
// '\"' => literal '"' (34 = '"')
|
||||
let cur_parts = native_list_append(cur_parts, "\"")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "n" {
|
||||
let cur_part = native_list_append(cur_part, "\n")
|
||||
if nc == 110 {
|
||||
// '\n' (110 = 'n')
|
||||
let cur_parts = native_list_append(cur_parts, "\n")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "t" {
|
||||
let cur_part = native_list_append(cur_part, "\t")
|
||||
if nc == 116 {
|
||||
// '\t' (116 = 't')
|
||||
let cur_parts = native_list_append(cur_parts, "\t")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "r" {
|
||||
let cur_part = native_list_append(cur_part, "\r")
|
||||
if nc == 114 {
|
||||
// '\r' (114 = 'r')
|
||||
let cur_parts = native_list_append(cur_parts, "\r")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "\\" {
|
||||
let cur_part = native_list_append(cur_part, "\\")
|
||||
if nc == 92 {
|
||||
// '\\' (92)
|
||||
let cur_parts = native_list_append(cur_parts, "\\")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
let cur_part = native_list_append(cur_part, next_ch)
|
||||
// Unknown escape: emit the escaped char verbatim
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, next_i, next_i + 1))
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
}
|
||||
}
|
||||
@@ -589,75 +631,85 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, An
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let i = i + 1
|
||||
let clean_start = next_i
|
||||
let i = next_i
|
||||
}
|
||||
} else {
|
||||
if ch == "\"" {
|
||||
// Closing quote - stop scanning
|
||||
if c == 34 {
|
||||
// '"' = 34 — closing quote: flush clean run, stop
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let i = i + 1
|
||||
let clean_start = i
|
||||
let running = false
|
||||
} else {
|
||||
if ch == "$" {
|
||||
// Check for ${ (start of interpolation)
|
||||
if c == 36 {
|
||||
// '$' = 36 — possible interpolation start
|
||||
let next_i = i + 1
|
||||
let is_interp = false
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
if next_ch == "{" {
|
||||
let nc2: Int = str_char_code(src, next_i)
|
||||
if nc2 == 123 {
|
||||
// '{' = 123
|
||||
let is_interp = true
|
||||
}
|
||||
}
|
||||
if is_interp {
|
||||
// Flush the accumulated literal part (if non-empty)
|
||||
let part_len: Int = native_list_len(cur_part)
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let part_len: Int = native_list_len(cur_parts)
|
||||
if part_len > 0 {
|
||||
let part_text = str_join(cur_part, "")
|
||||
let part_text = str_join(cur_parts, "")
|
||||
if need_plus {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
|
||||
let out_tokens = tok_append(out_tokens, "Plus", "+")
|
||||
}
|
||||
let clean_part = part_text
|
||||
if looks_like_code(part_text) {
|
||||
let clean_part = strip_code_comments(part_text)
|
||||
}
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part))
|
||||
let out_tokens = tok_append(out_tokens, "Str", clean_part)
|
||||
let need_plus = true
|
||||
}
|
||||
let cur_part = native_list_empty()
|
||||
let cur_parts = native_list_empty()
|
||||
let has_interp = true
|
||||
|
||||
// Scan brace-balanced expression source
|
||||
let brace_result = scan_interp_brace(chars, next_i + 1, total)
|
||||
let brace_result = scan_interp_brace(src, next_i + 1, total)
|
||||
let expr_src: String = brace_result["text"]
|
||||
let new_i: Int = brace_result["pos"]
|
||||
let i = new_i
|
||||
let clean_start = new_i
|
||||
|
||||
// Re-lex the expression and inline the tokens.
|
||||
// Wrap in ( ) so that operators inside ${} (e.g.
|
||||
// age + 1) are parsed as a grouped sub-expression
|
||||
// rather than merging with the surrounding concat
|
||||
// Plus tokens at the wrong precedence level.
|
||||
let inner_toks: [Map<String, Any>] = lex(expr_src)
|
||||
let inner_toks: [Any] = lex(expr_src)
|
||||
let inner_len: Int = native_list_len(inner_toks)
|
||||
|
||||
if need_plus {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
|
||||
let out_tokens = tok_append(out_tokens, "Plus", "+")
|
||||
}
|
||||
// Empty interpolation ${} => empty string segment
|
||||
if inner_len <= 1 {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", ""))
|
||||
// inner_len <= 2 = only the Eof pair (kind="Eof", value="")
|
||||
if inner_len <= 2 {
|
||||
let out_tokens = tok_append(out_tokens, "Str", "")
|
||||
} else {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("LParen", "("))
|
||||
let out_tokens = tok_append(out_tokens, "LParen", "(")
|
||||
let out_tokens = interp_tokens_append_all(out_tokens, inner_toks)
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("RParen", ")"))
|
||||
let out_tokens = tok_append(out_tokens, "RParen", ")")
|
||||
}
|
||||
let need_plus = true
|
||||
} else {
|
||||
// Plain '$' not followed by '{' - treat as literal
|
||||
let cur_part = native_list_append(cur_part, "$")
|
||||
// Plain '$' not followed by '{' - treat as literal, continue clean run
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let cur_part = native_list_append(cur_part, ch)
|
||||
// Plain char — extends clean run, no append needed
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
@@ -666,8 +718,11 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, An
|
||||
}
|
||||
|
||||
// Flush remaining literal segment and build final token list
|
||||
let part_text = str_join(cur_part, "")
|
||||
let part_len: Int = native_list_len(cur_part)
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let part_len: Int = native_list_len(cur_parts)
|
||||
let part_text = str_join(cur_parts, "")
|
||||
if has_interp {
|
||||
// Interpolated string: only emit trailing segment if non-empty
|
||||
if part_len > 0 {
|
||||
@@ -676,9 +731,9 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, An
|
||||
let clean_part = strip_code_comments(part_text)
|
||||
}
|
||||
if need_plus {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
|
||||
let out_tokens = tok_append(out_tokens, "Plus", "+")
|
||||
}
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part))
|
||||
let out_tokens = tok_append(out_tokens, "Str", clean_part)
|
||||
}
|
||||
} else {
|
||||
// Plain string with no interpolation - same behaviour as old scan_string
|
||||
@@ -686,42 +741,51 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, An
|
||||
if looks_like_code(part_text) {
|
||||
let clean_text = strip_code_comments(part_text)
|
||||
}
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_text))
|
||||
let out_tokens = tok_append(out_tokens, "Str", clean_text)
|
||||
}
|
||||
|
||||
{ "tokens": out_tokens, "pos": i }
|
||||
}
|
||||
|
||||
// -- Main lexer ----------------------------------------------------------------
|
||||
// Char code constants (avoids strdup for single-char comparison)
|
||||
// '/' = 47, '"' = 34, '0'-'9' = 48-57, 'a'-'z' = 97-122, 'A'-'Z' = 65-90
|
||||
// '_' = 95, ' '=32, '\t'=9, '\n'=10, '\r'=13
|
||||
// '=' = 61, '!' = 33, '<' = 60, '>' = 62, '&' = 38, '|' = 124
|
||||
// '-' = 45, ':' = 58, '+' = 43, '*' = 42, '%' = 37
|
||||
// '(' = 40, ')' = 41, '{' = 123, '}' = 125, '[' = 91, ']' = 93
|
||||
// ',' = 44, '.' = 46, ';' = 59, '@' = 64, '?' = 63
|
||||
|
||||
fn lex(source: String) -> [Map<String, Any>] {
|
||||
let chars: [String] = native_string_chars(source)
|
||||
let total: Int = native_list_len(chars)
|
||||
let tokens: [Map<String, Any>] = native_list_empty()
|
||||
fn lex(source: String) -> [Any] {
|
||||
// Use str_char_code (returns Int) instead of str_char_at (returns strdup String)
|
||||
// for all character classification in the hot loop. For a 400KB source,
|
||||
// str_char_at allocates ~400K × 16B = ~6.4MB of temporary strings.
|
||||
let total: Int = str_len(source)
|
||||
let tokens: [Any] = native_list_empty()
|
||||
let i: Int = 0
|
||||
|
||||
while i < total {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
let c: Int = str_char_code(source, i)
|
||||
|
||||
// Skip whitespace
|
||||
if lex_is_whitespace(ch) {
|
||||
// Skip whitespace (space=32, tab=9, newline=10, CR=13)
|
||||
if is_ws_code(c) {
|
||||
let i = i + 1
|
||||
} else {
|
||||
// Line comments: //
|
||||
if ch == "/" {
|
||||
// Line comments: // (slash=47)
|
||||
if c == 47 {
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
if next_ch == "/" {
|
||||
// skip to end of line
|
||||
let nc: Int = str_char_code(source, next_i)
|
||||
if nc == 47 {
|
||||
// skip to end of line (newline=10)
|
||||
let i = i + 2
|
||||
let running2 = true
|
||||
while running2 {
|
||||
if i >= total {
|
||||
let running2 = false
|
||||
} else {
|
||||
let lch: String = native_list_get(chars, i)
|
||||
if lch == "\n" {
|
||||
let lc: Int = str_char_code(source, i)
|
||||
if lc == 10 {
|
||||
let running2 = false
|
||||
} else {
|
||||
let i = i + 1
|
||||
@@ -729,232 +793,254 @@ fn lex(source: String) -> [Map<String, Any>] {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Slash", "/"))
|
||||
let tokens = tok_append(tokens, "Slash", "/")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Slash", "/"))
|
||||
let tokens = tok_append(tokens, "Slash", "/")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
// String literal (plain or interpolated with ${expr} syntax).
|
||||
// scan_interp_string handles both cases: plain strings emit a
|
||||
// single Str token; interpolated strings emit a flat token
|
||||
// sequence (Str Plus expr-tokens Plus Str ...) that the parser
|
||||
// naturally assembles into a BinOp concat tree.
|
||||
if ch == "\"" {
|
||||
let interp_result = scan_interp_string(chars, i + 1, total)
|
||||
let interp_toks: [Map<String, Any>] = interp_result["tokens"]
|
||||
// String literal: '"' = 34
|
||||
if c == 34 {
|
||||
let interp_result = scan_interp_string(source, i + 1, total)
|
||||
let interp_toks: [Any] = interp_result["tokens"]
|
||||
let new_pos: Int = interp_result["pos"]
|
||||
let tokens = interp_tokens_append_all(tokens, interp_toks)
|
||||
let i = new_pos
|
||||
} else {
|
||||
// Number literal
|
||||
if lex_is_digit(ch) {
|
||||
let result = scan_digits(chars, i, total)
|
||||
// Number literal: '0'-'9' = 48-57
|
||||
if is_digit_code(c) {
|
||||
let result = scan_digits(source, i, total)
|
||||
let num_text: String = result["text"]
|
||||
let new_pos: Int = result["pos"]
|
||||
// check for float (dot followed by digit)
|
||||
// check for float (dot=46 followed by digit)
|
||||
if new_pos < total {
|
||||
let dot_ch: String = native_list_get(chars, new_pos)
|
||||
if dot_ch == "." {
|
||||
let dc: Int = str_char_code(source, new_pos)
|
||||
if dc == 46 {
|
||||
let after_dot = new_pos + 1
|
||||
if after_dot < total {
|
||||
let after_dot_ch: String = native_list_get(chars, after_dot)
|
||||
if lex_is_digit(after_dot_ch) {
|
||||
let frac_result = scan_digits(chars, after_dot, total)
|
||||
let adc: Int = str_char_code(source, after_dot)
|
||||
if is_digit_code(adc) {
|
||||
let frac_result = scan_digits(source, after_dot, total)
|
||||
let frac_text: String = frac_result["text"]
|
||||
let frac_pos: Int = frac_result["pos"]
|
||||
let tokens = native_list_append(tokens, make_tok("Float", num_text + "." + frac_text))
|
||||
let tokens = tok_append(tokens, "Float", num_text + "." + frac_text)
|
||||
let i = frac_pos
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let tokens = tok_append(tokens, "Int", num_text)
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let tokens = tok_append(tokens, "Int", num_text)
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let tokens = tok_append(tokens, "Int", num_text)
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let tokens = tok_append(tokens, "Int", num_text)
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
// Identifier or keyword
|
||||
if lex_is_alpha(ch) || ch == "_" {
|
||||
let result = scan_ident(chars, i, total)
|
||||
// Identifier or keyword: alpha or '_'=95
|
||||
if is_alpha_code(c) || c == 95 {
|
||||
let result = scan_ident(source, i, total)
|
||||
let word: String = result["text"]
|
||||
let new_pos: Int = result["pos"]
|
||||
let kw = keyword_kind(word)
|
||||
if kw == "" {
|
||||
let tokens = native_list_append(tokens, make_tok("Ident", word))
|
||||
let tokens = tok_append(tokens, "Ident", word)
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok(kw, word))
|
||||
let tokens = tok_append(tokens, kw, word)
|
||||
}
|
||||
let i = new_pos
|
||||
} else {
|
||||
// Multi-char and single-char operators/delimiters
|
||||
let peek_i = i + 1
|
||||
let peek_ch = ""
|
||||
let peek_c: Int = -1
|
||||
if peek_i < total {
|
||||
let peek_ch: String = native_list_get(chars, peek_i)
|
||||
let peek_c: Int = str_char_code(source, peek_i)
|
||||
}
|
||||
|
||||
if ch == "=" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("EqEq", "=="))
|
||||
if c == 61 {
|
||||
// '=' = 61
|
||||
if peek_c == 61 {
|
||||
let tokens = tok_append(tokens, "EqEq", "==")
|
||||
let i = i + 2
|
||||
} else {
|
||||
if peek_ch == ">" {
|
||||
let tokens = native_list_append(tokens, make_tok("FatArrow", "=>"))
|
||||
if peek_c == 62 {
|
||||
// '>' = 62
|
||||
let tokens = tok_append(tokens, "FatArrow", "=>")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Eq", "="))
|
||||
let tokens = tok_append(tokens, "Eq", "=")
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ch == "!" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("NotEq", "!="))
|
||||
if c == 33 {
|
||||
// '!' = 33
|
||||
if peek_c == 61 {
|
||||
let tokens = tok_append(tokens, "NotEq", "!=")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Not", "!"))
|
||||
let tokens = tok_append(tokens, "Not", "!")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "<" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("LtEq", "<="))
|
||||
if c == 60 {
|
||||
// '<' = 60
|
||||
if peek_c == 61 {
|
||||
let tokens = tok_append(tokens, "LtEq", "<=")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Lt", "<"))
|
||||
let tokens = tok_append(tokens, "Lt", "<")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == ">" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("GtEq", ">="))
|
||||
if c == 62 {
|
||||
// '>' = 62
|
||||
if peek_c == 61 {
|
||||
let tokens = tok_append(tokens, "GtEq", ">=")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Gt", ">"))
|
||||
let tokens = tok_append(tokens, "Gt", ">")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "&" {
|
||||
if peek_ch == "&" {
|
||||
let tokens = native_list_append(tokens, make_tok("And", "&&"))
|
||||
if c == 38 {
|
||||
// '&' = 38
|
||||
if peek_c == 38 {
|
||||
let tokens = tok_append(tokens, "And", "&&")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "|" {
|
||||
if peek_ch == "|" {
|
||||
let tokens = native_list_append(tokens, make_tok("Or", "||"))
|
||||
if c == 124 {
|
||||
// '|' = 124
|
||||
if peek_c == 124 {
|
||||
let tokens = tok_append(tokens, "Or", "||")
|
||||
let i = i + 2
|
||||
} else {
|
||||
if peek_ch == ">" {
|
||||
let tokens = native_list_append(tokens, make_tok("PipeOp", "|>"))
|
||||
if peek_c == 62 {
|
||||
// '>' = 62
|
||||
let tokens = tok_append(tokens, "PipeOp", "|>")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Pipe", "|"))
|
||||
let tokens = tok_append(tokens, "Pipe", "|")
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ch == "-" {
|
||||
if peek_ch == ">" {
|
||||
let tokens = native_list_append(tokens, make_tok("Arrow", "->"))
|
||||
if c == 45 {
|
||||
// '-' = 45
|
||||
if peek_c == 62 {
|
||||
// '>' = 62
|
||||
let tokens = tok_append(tokens, "Arrow", "->")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Minus", "-"))
|
||||
let tokens = tok_append(tokens, "Minus", "-")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == ":" {
|
||||
if peek_ch == ":" {
|
||||
let tokens = native_list_append(tokens, make_tok("ColonColon", "::"))
|
||||
if c == 58 {
|
||||
// ':' = 58
|
||||
if peek_c == 58 {
|
||||
let tokens = tok_append(tokens, "ColonColon", "::")
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Colon", ":"))
|
||||
let tokens = tok_append(tokens, "Colon", ":")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "+" {
|
||||
let tokens = native_list_append(tokens, make_tok("Plus", "+"))
|
||||
if c == 43 {
|
||||
// '+' = 43
|
||||
let tokens = tok_append(tokens, "Plus", "+")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "*" {
|
||||
let tokens = native_list_append(tokens, make_tok("Star", "*"))
|
||||
if c == 42 {
|
||||
// '*' = 42
|
||||
let tokens = tok_append(tokens, "Star", "*")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "%" {
|
||||
let tokens = native_list_append(tokens, make_tok("Percent", "%"))
|
||||
if c == 37 {
|
||||
// '%' = 37
|
||||
let tokens = tok_append(tokens, "Percent", "%")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "(" {
|
||||
let tokens = native_list_append(tokens, make_tok("LParen", "("))
|
||||
if c == 40 {
|
||||
// '(' = 40
|
||||
let tokens = tok_append(tokens, "LParen", "(")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == ")" {
|
||||
let tokens = native_list_append(tokens, make_tok("RParen", ")"))
|
||||
if c == 41 {
|
||||
// ')' = 41
|
||||
let tokens = tok_append(tokens, "RParen", ")")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "{" {
|
||||
let tokens = native_list_append(tokens, make_tok("LBrace", "{"))
|
||||
if c == 123 {
|
||||
// '{' = 123
|
||||
let tokens = tok_append(tokens, "LBrace", "{")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "}" {
|
||||
let tokens = native_list_append(tokens, make_tok("RBrace", "}"))
|
||||
if c == 125 {
|
||||
// '}' = 125
|
||||
let tokens = tok_append(tokens, "RBrace", "}")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "[" {
|
||||
let tokens = native_list_append(tokens, make_tok("LBracket", "["))
|
||||
if c == 91 {
|
||||
// '[' = 91
|
||||
let tokens = tok_append(tokens, "LBracket", "[")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "]" {
|
||||
let tokens = native_list_append(tokens, make_tok("RBracket", "]"))
|
||||
if c == 93 {
|
||||
// ']' = 93
|
||||
let tokens = tok_append(tokens, "RBracket", "]")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "," {
|
||||
let tokens = native_list_append(tokens, make_tok("Comma", ","))
|
||||
if c == 44 {
|
||||
// ',' = 44
|
||||
let tokens = tok_append(tokens, "Comma", ",")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "." {
|
||||
// Check for ..= (inclusive range) before .. (exclusive range) before single .
|
||||
if c == 46 {
|
||||
// '.' = 46: check for ..= or ..
|
||||
let peek2_i = i + 2
|
||||
let peek2_ch = ""
|
||||
let peek2_c: Int = -1
|
||||
if peek2_i < total {
|
||||
let peek2_ch: String = native_list_get(chars, peek2_i)
|
||||
let peek2_c: Int = str_char_code(source, peek2_i)
|
||||
}
|
||||
if peek_ch == "." {
|
||||
if peek2_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("DotDotEq", "..="))
|
||||
if peek_c == 46 {
|
||||
// '..' prefix
|
||||
if peek2_c == 61 {
|
||||
// '..=' = 46 46 61
|
||||
let tokens = tok_append(tokens, "DotDotEq", "..=")
|
||||
let i = i + 3
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("DotDot", ".."))
|
||||
let tokens = tok_append(tokens, "DotDot", "..")
|
||||
let i = i + 2
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Dot", "."))
|
||||
let tokens = tok_append(tokens, "Dot", ".")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == ";" {
|
||||
let tokens = native_list_append(tokens, make_tok("Semicolon", ";"))
|
||||
if c == 59 {
|
||||
// ';' = 59
|
||||
let tokens = tok_append(tokens, "Semicolon", ";")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "@" {
|
||||
let tokens = native_list_append(tokens, make_tok("At", "@"))
|
||||
if c == 64 {
|
||||
// '@' = 64
|
||||
let tokens = tok_append(tokens, "At", "@")
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "?" {
|
||||
let tokens = native_list_append(tokens, make_tok("QuestionMark", "?"))
|
||||
if c == 63 {
|
||||
// '?' = 63
|
||||
let tokens = tok_append(tokens, "QuestionMark", "?")
|
||||
let i = i + 1
|
||||
} else {
|
||||
// unknown char - skip
|
||||
@@ -988,6 +1074,6 @@ fn lex(source: String) -> [Map<String, Any>] {
|
||||
}
|
||||
}
|
||||
|
||||
let tokens = native_list_append(tokens, make_tok("Eof", ""))
|
||||
let tokens = tok_append(tokens, "Eof", "")
|
||||
tokens
|
||||
}
|
||||
|
||||
@@ -9,25 +9,28 @@
|
||||
// The token list is passed as a parameter to all parse functions.
|
||||
// native_list_get is used to index into it without cloning.
|
||||
//
|
||||
// Entry point: fn parse(tokens: [Map<String, Any>]) -> [Map<String, Any>]
|
||||
// Entry point: fn parse(tokens: [Any]) -> [Map<String, Any>]
|
||||
|
||||
// -- Token access helpers ------------------------------------------------------
|
||||
// Tokens is a flat [Any] list: tokens[2*i] = kind, tokens[2*i+1] = value.
|
||||
// This avoids one ElMap allocation per token (~112B each), saving ~4MB on large
|
||||
// programs. All callers use these helpers -- only these three need updating.
|
||||
|
||||
fn tok_at(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
native_list_get(tokens, pos)
|
||||
fn tok_at(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let kind: String = native_list_get(tokens, pos * 2)
|
||||
let value: String = native_list_get(tokens, pos * 2 + 1)
|
||||
{ "kind": kind, "value": value }
|
||||
}
|
||||
|
||||
fn tok_kind(tokens: [Map<String, Any>], pos: Int) -> String {
|
||||
let t = native_list_get(tokens, pos)
|
||||
t["kind"]
|
||||
fn tok_kind(tokens: [Any], pos: Int) -> String {
|
||||
native_list_get(tokens, pos * 2)
|
||||
}
|
||||
|
||||
fn tok_value(tokens: [Map<String, Any>], pos: Int) -> String {
|
||||
let t = native_list_get(tokens, pos)
|
||||
t["value"]
|
||||
fn tok_value(tokens: [Any], pos: Int) -> String {
|
||||
native_list_get(tokens, pos * 2 + 1)
|
||||
}
|
||||
|
||||
fn expect(tokens: [Map<String, Any>], pos: Int, kind: String) -> Int {
|
||||
fn expect(tokens: [Any], pos: Int, kind: String) -> Int {
|
||||
let k = tok_kind(tokens, pos)
|
||||
if k == kind {
|
||||
return pos + 1
|
||||
@@ -46,7 +49,7 @@ fn make_result(node: Map<String, Any>, pos: Int) -> Map<String, Any> {
|
||||
// Skips over a type annotation, returning the new position.
|
||||
// Types can be: Ident, [Type], Map<K,V>, Type?, Type<Type,...>
|
||||
|
||||
fn skip_type(tokens: [Map<String, Any>], pos: Int) -> Int {
|
||||
fn skip_type(tokens: [Any], pos: Int) -> Int {
|
||||
let k = tok_kind(tokens, pos)
|
||||
// Array type: [Type]
|
||||
if k == "LBracket" {
|
||||
@@ -103,7 +106,7 @@ fn skip_type(tokens: [Map<String, Any>], pos: Int) -> Int {
|
||||
// -- Parameter list ------------------------------------------------------------
|
||||
// Parses (name: Type, name: Type, ...) - returns { "params": [...], "pos": ... }
|
||||
|
||||
fn parse_params(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_params(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let p = expect(tokens, pos, "LParen")
|
||||
let params: [Map<String, Any>] = native_list_empty()
|
||||
let running = true
|
||||
@@ -292,7 +295,7 @@ fn is_void_element(name: String) -> Bool {
|
||||
|
||||
// Collect tokens as text content until we hit Lt, LBrace, Eof, or a
|
||||
// closing-tag marker (Lt Slash). Returns { "text": "...", "pos": p }
|
||||
fn parse_html_text_tokens(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_html_text_tokens(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let parts: [String] = native_list_empty()
|
||||
let p = pos
|
||||
let running = true
|
||||
@@ -322,7 +325,7 @@ fn parse_html_text_tokens(tokens: [Map<String, Any>], pos: Int) -> Map<String, A
|
||||
|
||||
// Parse an attribute list: (attrname | attrname="val" | attrname={expr})*
|
||||
// Stops at Gt or Slash (for self-closing />).
|
||||
fn parse_html_attrs(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_html_attrs(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let attrs: [Map<String, Any>] = native_list_empty()
|
||||
let p = pos
|
||||
let running = true
|
||||
@@ -374,7 +377,7 @@ fn parse_html_attrs(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
|
||||
// Parse the children of an HTML element until we see the closing tag </tag>
|
||||
// or EOF. Returns { "children": [...], "pos": p_after_closing_tag }
|
||||
fn parse_html_children(tokens: [Map<String, Any>], pos: Int, parent_tag: String) -> Map<String, Any> {
|
||||
fn parse_html_children(tokens: [Any], pos: Int, parent_tag: String) -> Map<String, Any> {
|
||||
let children: [Map<String, Any>] = native_list_empty()
|
||||
let p = pos
|
||||
let running = true
|
||||
@@ -514,14 +517,14 @@ fn parse_html_children(tokens: [Map<String, Any>], pos: Int, parent_tag: String)
|
||||
|
||||
// Parse body of {#each} until {/each}. Mirrors parse_html_children but
|
||||
// stops at the {/each} sentinel rather than a closing element tag.
|
||||
fn parse_html_each_body(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_html_each_body(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
parse_html_children(tokens, pos, "__each__")
|
||||
}
|
||||
|
||||
// Parse a single HTML element: <tag attrs> children </tag>
|
||||
// or self-closing: <tag attrs/>
|
||||
// Pos points to the Lt token.
|
||||
fn parse_html_element(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_html_element(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let p = pos
|
||||
// consume <
|
||||
let p = expect(tokens, p, "Lt")
|
||||
@@ -558,7 +561,7 @@ fn parse_html_element(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any>
|
||||
// Entry point for HTML template parsing.
|
||||
// Pos points to Lt (or Lt Not for <!doctype>).
|
||||
// May parse an optional <!doctype html> prefix followed by the root element.
|
||||
fn parse_html_template(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_html_template(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let p = pos
|
||||
// Check for <!doctype html>
|
||||
let doctype = false
|
||||
@@ -596,7 +599,7 @@ fn parse_html_template(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any>
|
||||
make_result({ "expr": "HtmlTemplate", "root": root_with_doctype }, p)
|
||||
}
|
||||
|
||||
fn parse_primary(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_primary(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let k = tok_kind(tokens, pos)
|
||||
let v = tok_value(tokens, pos)
|
||||
|
||||
@@ -819,7 +822,7 @@ fn parse_primary(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
make_result({ "expr": "Nil" }, pos + 1)
|
||||
}
|
||||
|
||||
fn parse_if(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_if(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let p = expect(tokens, pos, "If")
|
||||
// Suppress Map-literal parsing in the cond so a stray `{` (the start
|
||||
// of the then-block) isn't gobbled as a Map.
|
||||
@@ -855,7 +858,7 @@ fn parse_if(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
make_result({ "expr": "If", "cond": cond, "then": then_stmts, "else": else_stmts, "has_else": has_else }, p)
|
||||
}
|
||||
|
||||
fn parse_match(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_match(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let p = expect(tokens, pos, "Match")
|
||||
let prev_no_block: String = state_get("__no_block_expr")
|
||||
state_set("__no_block_expr", "1")
|
||||
@@ -895,7 +898,7 @@ fn parse_match(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
make_result({ "expr": "Match", "subject": subject, "arms": arms }, p)
|
||||
}
|
||||
|
||||
fn parse_pattern(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_pattern(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let k = tok_kind(tokens, pos)
|
||||
if k == "Ident" {
|
||||
let v = tok_value(tokens, pos)
|
||||
@@ -924,7 +927,7 @@ fn parse_pattern(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
make_result({ "pattern": "Wildcard" }, pos + 1)
|
||||
}
|
||||
|
||||
fn parse_for_expr(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_for_expr(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let p = expect(tokens, pos, "For")
|
||||
let item_name = tok_value(tokens, p)
|
||||
let p = p + 1
|
||||
@@ -941,7 +944,7 @@ fn parse_for_expr(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
make_result({ "expr": "For", "item": item_name, "list": list_expr, "body": body }, p)
|
||||
}
|
||||
|
||||
fn parse_block(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_block(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let p = expect(tokens, pos, "LBrace")
|
||||
let stmts: [Map<String, Any>] = native_list_empty()
|
||||
let running = true
|
||||
@@ -998,7 +1001,7 @@ fn is_duration_unit(name: String) -> Bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn parse_postfix(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_postfix(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let r = parse_primary(tokens, pos)
|
||||
let node = r["node"]
|
||||
let p = r["pos"]
|
||||
@@ -1115,7 +1118,7 @@ fn is_binop(kind: String) -> Bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn parse_binop(tokens: [Map<String, Any>], pos: Int, min_prec: Int) -> Map<String, Any> {
|
||||
fn parse_binop(tokens: [Any], pos: Int, min_prec: Int) -> Map<String, Any> {
|
||||
let r = parse_postfix(tokens, pos)
|
||||
let left = r["node"]
|
||||
let p = r["pos"]
|
||||
@@ -1140,13 +1143,13 @@ fn parse_binop(tokens: [Map<String, Any>], pos: Int, min_prec: Int) -> Map<Strin
|
||||
make_result(left, p)
|
||||
}
|
||||
|
||||
fn parse_expr(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_expr(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
parse_binop(tokens, pos, 1)
|
||||
}
|
||||
|
||||
// -- Statement parsing ---------------------------------------------------------
|
||||
|
||||
fn parse_stmt(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
fn parse_stmt(tokens: [Any], pos: Int) -> Map<String, Any> {
|
||||
let k = tok_kind(tokens, pos)
|
||||
|
||||
// let binding
|
||||
@@ -1619,8 +1622,9 @@ fn parse_stmt(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
|
||||
|
||||
// -- Top-level parse ------------------------------------------------------------
|
||||
|
||||
fn parse(tokens: [Map<String, Any>]) -> [Map<String, Any>] {
|
||||
let total: Int = native_list_len(tokens)
|
||||
fn parse(tokens: [Any]) -> [Map<String, Any>] {
|
||||
// Flat list: 2 entries per token, so divide by 2 for token count.
|
||||
let total: Int = native_list_len(tokens) / 2
|
||||
let stmts: [Map<String, Any>] = native_list_empty()
|
||||
let pos: Int = 0
|
||||
let running = true
|
||||
|
||||
Reference in New Issue
Block a user