diff --git a/lang/el-compiler/src/codegen.el b/lang/el-compiler/src/codegen.el index 2cbd90c..69e9fec 100644 --- a/lang/el-compiler/src/codegen.el +++ b/lang/el-compiler/src/codegen.el @@ -38,10 +38,13 @@ fn is_hex_digit_byte(b: Int) -> Bool { } fn c_escape(s: String) -> String { - // Use index-based byte scanning via str_char_code(s, i) and str_char_at(s, i). - // This avoids native_string_chars + str_join, which corrupts high-byte (>= 0x80) - // characters because list_join's looks_like_string heuristic rejects strings - // whose first byte is >= 0x7F and emits them as decimal pointer values instead. + // Batch ASCII chars using str_slice instead of str_char_at per byte. + // Track clean_start: the beginning of the current run of bytes that need + // no escaping. On each special byte, flush the accumulated clean run via + // str_slice, then append the escape. This reduces parts-list appends from + // O(N) to O(K) where K = number of special bytes << N for normal strings. + // + // Special bytes: '"'=34, '\\'=92, '\n'=10, '\r'=13, '\t'=9, any byte>=128. // // IMPORTANT: after a \xNN hex escape, if the next byte is a hex digit // (0-9, a-f, A-F), we emit `""` to split the C string literal so the C @@ -51,46 +54,75 @@ fn c_escape(s: String) -> String { let total: Int = str_len(s) let parts: [String] = native_list_empty() let i: Int = 0 + let clean_start: Int = 0 let prev_was_hex_escape: Bool = false while i < total { let bval: Int = str_char_code(s, i) - // If the previous token was a \xNN escape and the current byte is a - // hex digit, insert an empty string literal ("") to break the escape. + // Handle the hex-escape split case first: if prev was \xNN and this + // byte is a hex digit, we must flush the clean run and insert "". + // (At this point clean_start == i since the previous special byte + // already reset it, so flush is a no-op unless something is pending.) if prev_was_hex_escape { if is_hex_digit_byte(bval) { + // Flush any accumulated clean bytes before the split marker. + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\"\"") + let clean_start = i } } let prev_was_hex_escape = false if bval == 34 { - // 34 = '"' + // 34 = '"' — flush clean run, then escape + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\\"") + let clean_start = i + 1 } else { if bval == 92 { // 92 = '\\' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\\\") + let clean_start = i + 1 } else { if bval == 10 { // 10 = '\n' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\n") + let clean_start = i + 1 } else { if bval == 13 { // 13 = '\r' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\r") + let clean_start = i + 1 } else { if bval == 9 { // 9 = '\t' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\t") + let clean_start = i + 1 } else { if bval >= 128 { - // Escape non-ASCII bytes (>= 0x80) as \xNN so - // Clang does not misinterpret multi-byte UTF-8 - // sequences in C string literals. + // Non-ASCII: flush, then \xNN + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\x" + byte_to_hex2(bval)) let prev_was_hex_escape = true - } else { - let parts = native_list_append(parts, str_char_at(s, i)) + let clean_start = i + 1 } + // else: plain ASCII — extends the current clean run (no append) } } } @@ -98,6 +130,10 @@ fn c_escape(s: String) -> String { } let i = i + 1 } + // Flush the final clean run if any + if clean_start < total { + let parts = native_list_append(parts, str_slice(s, clean_start, total)) + } str_join(parts, "") } diff --git a/lang/el-compiler/src/compiler.el b/lang/el-compiler/src/compiler.el index e74b38a..0df9c5b 100644 --- a/lang/el-compiler/src/compiler.el +++ b/lang/el-compiler/src/compiler.el @@ -21,7 +21,7 @@ import "codegen-js.el" // compile — full pipeline (C target): source string -> C source string fn compile(source: String) -> String { - let tokens: [Map] = lex(source) + let tokens: [Any] = lex(source) let stmts: [Map] = parse(tokens) // Token list is no longer needed after parsing — release it to free memory // before codegen allocates its own working data on large source files. @@ -31,7 +31,7 @@ fn compile(source: String) -> String { // compile_js — full pipeline (JS target, module mode): source string -> JS source string fn compile_js(source: String) -> String { - let tokens: [Map] = lex(source) + let tokens: [Any] = lex(source) let stmts: [Map] = parse(tokens) // Token list is no longer needed after parsing — release it to free memory. el_release(tokens) @@ -41,7 +41,7 @@ fn compile_js(source: String) -> String { // compile_js_with_bundle — JS target in bundle mode. // Reads el_runtime.js from runtime_path and inlines it inside an IIFE. fn compile_js_with_bundle(source: String, runtime_path: String) -> String { - let tokens: [Map] = lex(source) + let tokens: [Any] = lex(source) let stmts: [Map] = parse(tokens) el_release(tokens) let runtime_content: String = fs_read(runtime_path) @@ -501,7 +501,7 @@ fn main() -> Void { // (without inlining imports) and write out a .elh file alongside the .c. if do_emit_header { let raw_source: String = fs_read(src_path) - let hdr_tokens: [Map] = lex(raw_source) + let hdr_tokens: [Any] = lex(raw_source) let hdr_stmts: [Map] = parse(hdr_tokens) el_release(hdr_tokens) let hdr_path: String = str_slice(src_path, 0, str_len(src_path) - 3) + ".elh" diff --git a/lang/el-compiler/src/lexer.el b/lang/el-compiler/src/lexer.el index 504c9c6..48cf5fe 100644 --- a/lang/el-compiler/src/lexer.el +++ b/lang/el-compiler/src/lexer.el @@ -7,11 +7,50 @@ // // Entry point: fn lex(source: String) -> [Map] // -// Uses native_string_chars to split the source into a chars list, -// then indexes it with native_list_get - avoids O(N-) string cloning. +// Performance: the hot lexer loop uses str_char_code (returns Int) instead of +// str_char_at (returns strdup'd String) for character classification. +// For a 400KB source, str_char_at allocates ~400K × 16B = ~6.4MB of temporary +// strings for the `ch` variable alone. str_char_code avoids all that. -// -- Character helpers --------------------------------------------------------- +// -- Character helpers (Int-based, no string allocation) ---------------------- +// These operate on char codes (from str_char_code) instead of str_char_at, +// eliminating one strdup per character in the hot lexer loop. +fn is_digit_code(c: Int) -> Bool { + // '0'=48 .. '9'=57 + if c >= 48 { + if c <= 57 { return true } + } + false +} + +fn is_alpha_code(c: Int) -> Bool { + // 'A'=65..'Z'=90, 'a'=97..'z'=122 + if c >= 65 { + if c <= 90 { return true } + } + if c >= 97 { + if c <= 122 { return true } + } + false +} + +fn is_alnum_or_underscore_code(c: Int) -> Bool { + if is_digit_code(c) { return true } + if is_alpha_code(c) { return true } + if c == 95 { return true } // '_' + false +} + +fn is_ws_code(c: Int) -> Bool { + if c == 32 { return true } // ' ' + if c == 9 { return true } // '\t' + if c == 10 { return true } // '\n' + if c == 13 { return true } // '\r' + false +} + +// Legacy String-based helpers kept for scan_interp helpers that use str_char_at. fn lex_is_digit(ch: String) -> Bool { if ch == "0" { return true } if ch == "1" { return true } @@ -97,8 +136,11 @@ fn lex_is_whitespace(ch: String) -> Bool { false } -fn make_tok(kind: String, value: String) -> Map { - { "kind": kind, "value": value } +// tok_append — append a (kind, value) pair to a flat token list. +// Returns the updated list. Gamma combines flat-list + char-code for max savings. +fn tok_append(tokens: [Any], kind: String, value: String) -> [Any] { + let tokens = native_list_append(tokens, kind) + native_list_append(tokens, value) } // -- Keyword lookup ------------------------------------------------------------ @@ -157,45 +199,43 @@ fn keyword_kind(word: String) -> String { // scan_digits - advance i while chars[i] is a digit // Returns { "text": ..., "pos": i } -fn scan_digits(chars: [String], start: Int, total: Int) -> Map { +fn scan_digits(src: String, start: Int, total: Int) -> Map { let i = start - let parts: [String] = native_list_empty() let running = true while running { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) - if lex_is_digit(ch) { - let parts = native_list_append(parts, ch) + let c: Int = str_char_code(src, i) + if is_digit_code(c) { let i = i + 1 } else { let running = false } } } - { "text": str_join(parts, ""), "pos": i } + // Use str_slice instead of building a parts list — O(1) allocation, O(n) copy. + { "text": str_slice(src, start, i), "pos": i } } // scan_ident - advance i while chars[i] is alphanumeric or underscore -fn scan_ident(chars: [String], start: Int, total: Int) -> Map { +fn scan_ident(src: String, start: Int, total: Int) -> Map { let i = start - let parts: [String] = native_list_empty() let running = true while running { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) - if is_alnum_or_underscore(ch) { - let parts = native_list_append(parts, ch) + let c: Int = str_char_code(src, i) + if is_alnum_or_underscore_code(c) { let i = i + 1 } else { let running = false } } } - { "text": str_join(parts, ""), "pos": i } + // Use str_slice instead of building a parts list — O(1) allocation, O(n) copy. + { "text": str_slice(src, start, i), "pos": i } } // -- Code-bearing string detection + comment strip ---------------------------- @@ -208,34 +248,16 @@ fn scan_ident(chars: [String], start: Int, total: Int) -> Map { // looks_like_code - heuristic gate so we only strip strings that actually // embed JS or CSS. Plain prose, hex blobs, JSON, etc. pass through verbatim. -fn substr_at(chars: [String], start: Int, total: Int, needle: String) -> Bool { - let nchars: [String] = native_string_chars(needle) - let nlen: Int = native_list_len(nchars) +fn substr_at(src: String, start: Int, total: Int, needle: String) -> Bool { + let nlen: Int = str_len(needle) if start + nlen > total { return false } - let i = 0 - let matched = true - while i < nlen { - let a: String = native_list_get(chars, start + i) - let b: String = native_list_get(nchars, i) - if a == b { let i = i + 1 } else { let matched = false; let i = nlen } - } - matched + // Use str_slice comparison instead of char-by-char loop. + str_eq(str_slice(src, start, start + nlen), needle) } fn str_has(s: String, needle: String) -> Bool { - let chars: [String] = native_string_chars(s) - let total: Int = native_list_len(chars) - let i = 0 - let found = false - while i < total { - if substr_at(chars, i, total, needle) { - let found = true - let i = total - } else { - let i = i + 1 - } - } - found + // Use the built-in str_contains which is implemented in native C — O(n) single pass. + str_contains(s, needle) } fn looks_like_code(s: String) -> Bool { @@ -254,8 +276,7 @@ fn looks_like_code(s: String) -> Bool { // comment opener: if the char immediately before '/' is ':', emit the '/' // literally and advance one position. fn strip_code_comments(s: String) -> String { - let chars: [String] = native_string_chars(s) - let total: Int = native_list_len(chars) + let total: Int = str_len(s) let out_parts: [String] = native_list_empty() let i = 0 let in_squote = false @@ -263,7 +284,7 @@ fn strip_code_comments(s: String) -> String { let in_btick = false let prev = "" while i < total { - let ch: String = native_list_get(chars, i) + let ch: String = str_char_at(s, i) let in_js_string = false if in_squote { let in_js_string = true } if in_dquote { let in_js_string = true } @@ -275,7 +296,7 @@ fn strip_code_comments(s: String) -> String { let out_parts = native_list_append(out_parts, ch) let next_i = i + 1 if next_i < total { - let nc: String = native_list_get(chars, next_i) + let nc: String = str_char_at(s, next_i) let out_parts = native_list_append(out_parts, nc) let prev = nc let i = next_i + 1 @@ -304,7 +325,7 @@ fn strip_code_comments(s: String) -> String { let next_i = i + 1 let next_ch = "" if next_i < total { - let next_ch: String = native_list_get(chars, next_i) + let next_ch: String = str_char_at(s, next_i) } if ch == "/" { @@ -323,7 +344,7 @@ fn strip_code_comments(s: String) -> String { if i >= total { let scanning = false } else { - let lc: String = native_list_get(chars, i) + let lc: String = str_char_at(s, i) if lc == "\n" { let scanning = false } else { @@ -342,11 +363,11 @@ fn strip_code_comments(s: String) -> String { if i >= total { let scanning2 = false } else { - let bc: String = native_list_get(chars, i) + let bc: String = str_char_at(s, i) if bc == "*" { let after = i + 1 if after < total { - let nc2: String = native_list_get(chars, after) + let nc2: String = str_char_at(s, after) if nc2 == "/" { let i = after + 1 let scanning2 = false @@ -402,7 +423,7 @@ fn strip_code_comments(s: String) -> String { // scan_string - scan a quoted string literal, handling \" escapes. // Starts AFTER the opening quote. Returns { "text": content, "pos": i_after_close } -fn scan_string(chars: [String], start: Int, total: Int) -> Map { +fn scan_string(src: String, start: Int, total: Int) -> Map { let i = start let parts: [String] = native_list_empty() let running = true @@ -410,12 +431,12 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) + let ch: String = str_char_at(src, i) if ch == "\\" { // escape: peek next char let next_i = i + 1 if next_i < total { - let next_ch: String = native_list_get(chars, next_i) + let next_ch: String = str_char_at(src, next_i) if next_ch == "\"" { let parts = native_list_append(parts, "\"") let i = next_i + 1 @@ -465,19 +486,17 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map { // scan_interp_brace - scan from `start` (the char after `${`) to the matching // `}`, tracking brace depth so inner braces (e.g. fn calls, map literals) are // handled correctly. Returns { "text": inner_source, "pos": i_after_close }. -fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map { +fn scan_interp_brace(src: String, start: Int, total: Int) -> Map { let i = start - let parts: [String] = native_list_empty() let depth = 1 let running = true while running { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) + let ch: String = str_char_at(src, i) if ch == "{" { let depth = depth + 1 - let parts = native_list_append(parts, ch) let i = i + 1 } else { if ch == "}" { @@ -487,33 +506,33 @@ fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map], src: [Map]) -> [Map] { +// interp_tokens_append_all - copy every (kind, value) pair from flat src list +// into flat dst list, skipping the trailing Eof pair that lex() always appends. +fn interp_tokens_append_all(dst: [Any], src: [Any]) -> [Any] { let src_len: Int = native_list_len(src) let j = 0 let result = dst while j < src_len { - let tok: Map = native_list_get(src, j) - let tk: String = tok["kind"] - if tk == "Eof" { + let kind: String = native_list_get(src, j) + if kind == "Eof" { let j = src_len } else { - let result = native_list_append(result, tok) - let j = j + 1 + let val: String = native_list_get(src, j + 1) + let result = native_list_append(result, kind) + let result = native_list_append(result, val) + let j = j + 2 } } result @@ -536,10 +555,17 @@ fn interp_tokens_append_all(dst: [Map], src: [Map]) -> // // Supported escape sequences: \" \n \t \r \\ \$ (literal dollar sign). // Nested quotes inside ${} are not supported; use a variable instead. -fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map { +// +// Performance: uses str_char_code (Int) for all character dispatch, eliminating +// per-character strdup. Plain runs are batched into str_slice segments instead +// of accumulating single-char strings, reducing list appends from O(N) to O(K) +// where K = number of escape/special chars in the literal. +// Char codes: '\' = 92, '"' = 34, '$' = 36, '{' = 123 +fn scan_interp_string(src: String, start: Int, total: Int) -> Map { let i = start - let out_tokens: [Map] = native_list_empty() - let cur_part: [String] = native_list_empty() + let out_tokens: [Any] = native_list_empty() + let cur_parts: [String] = native_list_empty() + let clean_start = start let has_interp = false let need_plus = false let running = true @@ -548,39 +574,55 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map= total { let running = false } else { - let ch: String = native_list_get(chars, i) + let c: Int = str_char_code(src, i) - if ch == "\\" { - // Escape sequence + if c == 92 { + // '\\' = 92 — escape sequence: flush clean run, append resolved char + if clean_start < i { + let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i)) + } let next_i = i + 1 if next_i < total { - let next_ch: String = native_list_get(chars, next_i) - if next_ch == "$" { - // \$ => literal '$' (escape for interpolation syntax) - let cur_part = native_list_append(cur_part, "$") + let nc: Int = str_char_code(src, next_i) + if nc == 36 { + // '\$' => literal '$' (36 = '$') + let cur_parts = native_list_append(cur_parts, "$") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "\"" { - let cur_part = native_list_append(cur_part, "\"") + if nc == 34 { + // '\"' => literal '"' (34 = '"') + let cur_parts = native_list_append(cur_parts, "\"") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "n" { - let cur_part = native_list_append(cur_part, "\n") + if nc == 110 { + // '\n' (110 = 'n') + let cur_parts = native_list_append(cur_parts, "\n") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "t" { - let cur_part = native_list_append(cur_part, "\t") + if nc == 116 { + // '\t' (116 = 't') + let cur_parts = native_list_append(cur_parts, "\t") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "r" { - let cur_part = native_list_append(cur_part, "\r") + if nc == 114 { + // '\r' (114 = 'r') + let cur_parts = native_list_append(cur_parts, "\r") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "\\" { - let cur_part = native_list_append(cur_part, "\\") + if nc == 92 { + // '\\' (92) + let cur_parts = native_list_append(cur_parts, "\\") + let clean_start = next_i + 1 let i = next_i + 1 } else { - let cur_part = native_list_append(cur_part, next_ch) + // Unknown escape: emit the escaped char verbatim + let cur_parts = native_list_append(cur_parts, str_slice(src, next_i, next_i + 1)) + let clean_start = next_i + 1 let i = next_i + 1 } } @@ -589,75 +631,85 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map 0 { - let part_text = str_join(cur_part, "") + let part_text = str_join(cur_parts, "") if need_plus { - let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+")) + let out_tokens = tok_append(out_tokens, "Plus", "+") } let clean_part = part_text if looks_like_code(part_text) { let clean_part = strip_code_comments(part_text) } - let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part)) + let out_tokens = tok_append(out_tokens, "Str", clean_part) let need_plus = true } - let cur_part = native_list_empty() + let cur_parts = native_list_empty() let has_interp = true // Scan brace-balanced expression source - let brace_result = scan_interp_brace(chars, next_i + 1, total) + let brace_result = scan_interp_brace(src, next_i + 1, total) let expr_src: String = brace_result["text"] let new_i: Int = brace_result["pos"] let i = new_i + let clean_start = new_i // Re-lex the expression and inline the tokens. // Wrap in ( ) so that operators inside ${} (e.g. // age + 1) are parsed as a grouped sub-expression // rather than merging with the surrounding concat // Plus tokens at the wrong precedence level. - let inner_toks: [Map] = lex(expr_src) + let inner_toks: [Any] = lex(expr_src) let inner_len: Int = native_list_len(inner_toks) if need_plus { - let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+")) + let out_tokens = tok_append(out_tokens, "Plus", "+") } // Empty interpolation ${} => empty string segment - if inner_len <= 1 { - let out_tokens = native_list_append(out_tokens, make_tok("Str", "")) + // inner_len <= 2 = only the Eof pair (kind="Eof", value="") + if inner_len <= 2 { + let out_tokens = tok_append(out_tokens, "Str", "") } else { - let out_tokens = native_list_append(out_tokens, make_tok("LParen", "(")) + let out_tokens = tok_append(out_tokens, "LParen", "(") let out_tokens = interp_tokens_append_all(out_tokens, inner_toks) - let out_tokens = native_list_append(out_tokens, make_tok("RParen", ")")) + let out_tokens = tok_append(out_tokens, "RParen", ")") } let need_plus = true } else { - // Plain '$' not followed by '{' - treat as literal - let cur_part = native_list_append(cur_part, "$") + // Plain '$' not followed by '{' - treat as literal, continue clean run let i = i + 1 } } else { - let cur_part = native_list_append(cur_part, ch) + // Plain char — extends clean run, no append needed let i = i + 1 } } @@ -666,8 +718,11 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map 0 { @@ -676,9 +731,9 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map Map' = 62, '&' = 38, '|' = 124 +// '-' = 45, ':' = 58, '+' = 43, '*' = 42, '%' = 37 +// '(' = 40, ')' = 41, '{' = 123, '}' = 125, '[' = 91, ']' = 93 +// ',' = 44, '.' = 46, ';' = 59, '@' = 64, '?' = 63 -fn lex(source: String) -> [Map] { - let chars: [String] = native_string_chars(source) - let total: Int = native_list_len(chars) - let tokens: [Map] = native_list_empty() +fn lex(source: String) -> [Any] { + // Use str_char_code (returns Int) instead of str_char_at (returns strdup String) + // for all character classification in the hot loop. For a 400KB source, + // str_char_at allocates ~400K × 16B = ~6.4MB of temporary strings. + let total: Int = str_len(source) + let tokens: [Any] = native_list_empty() let i: Int = 0 while i < total { - let ch: String = native_list_get(chars, i) + let c: Int = str_char_code(source, i) - // Skip whitespace - if lex_is_whitespace(ch) { + // Skip whitespace (space=32, tab=9, newline=10, CR=13) + if is_ws_code(c) { let i = i + 1 } else { - // Line comments: // - if ch == "/" { + // Line comments: // (slash=47) + if c == 47 { let next_i = i + 1 if next_i < total { - let next_ch: String = native_list_get(chars, next_i) - if next_ch == "/" { - // skip to end of line + let nc: Int = str_char_code(source, next_i) + if nc == 47 { + // skip to end of line (newline=10) let i = i + 2 let running2 = true while running2 { if i >= total { let running2 = false } else { - let lch: String = native_list_get(chars, i) - if lch == "\n" { + let lc: Int = str_char_code(source, i) + if lc == 10 { let running2 = false } else { let i = i + 1 @@ -729,232 +793,254 @@ fn lex(source: String) -> [Map] { } } } else { - let tokens = native_list_append(tokens, make_tok("Slash", "/")) + let tokens = tok_append(tokens, "Slash", "/") let i = i + 1 } } else { - let tokens = native_list_append(tokens, make_tok("Slash", "/")) + let tokens = tok_append(tokens, "Slash", "/") let i = i + 1 } } else { - // String literal (plain or interpolated with ${expr} syntax). - // scan_interp_string handles both cases: plain strings emit a - // single Str token; interpolated strings emit a flat token - // sequence (Str Plus expr-tokens Plus Str ...) that the parser - // naturally assembles into a BinOp concat tree. - if ch == "\"" { - let interp_result = scan_interp_string(chars, i + 1, total) - let interp_toks: [Map] = interp_result["tokens"] + // String literal: '"' = 34 + if c == 34 { + let interp_result = scan_interp_string(source, i + 1, total) + let interp_toks: [Any] = interp_result["tokens"] let new_pos: Int = interp_result["pos"] let tokens = interp_tokens_append_all(tokens, interp_toks) let i = new_pos } else { - // Number literal - if lex_is_digit(ch) { - let result = scan_digits(chars, i, total) + // Number literal: '0'-'9' = 48-57 + if is_digit_code(c) { + let result = scan_digits(source, i, total) let num_text: String = result["text"] let new_pos: Int = result["pos"] - // check for float (dot followed by digit) + // check for float (dot=46 followed by digit) if new_pos < total { - let dot_ch: String = native_list_get(chars, new_pos) - if dot_ch == "." { + let dc: Int = str_char_code(source, new_pos) + if dc == 46 { let after_dot = new_pos + 1 if after_dot < total { - let after_dot_ch: String = native_list_get(chars, after_dot) - if lex_is_digit(after_dot_ch) { - let frac_result = scan_digits(chars, after_dot, total) + let adc: Int = str_char_code(source, after_dot) + if is_digit_code(adc) { + let frac_result = scan_digits(source, after_dot, total) let frac_text: String = frac_result["text"] let frac_pos: Int = frac_result["pos"] - let tokens = native_list_append(tokens, make_tok("Float", num_text + "." + frac_text)) + let tokens = tok_append(tokens, "Float", num_text + "." + frac_text) let i = frac_pos } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - // Identifier or keyword - if lex_is_alpha(ch) || ch == "_" { - let result = scan_ident(chars, i, total) + // Identifier or keyword: alpha or '_'=95 + if is_alpha_code(c) || c == 95 { + let result = scan_ident(source, i, total) let word: String = result["text"] let new_pos: Int = result["pos"] let kw = keyword_kind(word) if kw == "" { - let tokens = native_list_append(tokens, make_tok("Ident", word)) + let tokens = tok_append(tokens, "Ident", word) } else { - let tokens = native_list_append(tokens, make_tok(kw, word)) + let tokens = tok_append(tokens, kw, word) } let i = new_pos } else { // Multi-char and single-char operators/delimiters let peek_i = i + 1 - let peek_ch = "" + let peek_c: Int = -1 if peek_i < total { - let peek_ch: String = native_list_get(chars, peek_i) + let peek_c: Int = str_char_code(source, peek_i) } - if ch == "=" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("EqEq", "==")) + if c == 61 { + // '=' = 61 + if peek_c == 61 { + let tokens = tok_append(tokens, "EqEq", "==") let i = i + 2 } else { - if peek_ch == ">" { - let tokens = native_list_append(tokens, make_tok("FatArrow", "=>")) + if peek_c == 62 { + // '>' = 62 + let tokens = tok_append(tokens, "FatArrow", "=>") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Eq", "=")) + let tokens = tok_append(tokens, "Eq", "=") let i = i + 1 } } } else { - if ch == "!" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("NotEq", "!=")) + if c == 33 { + // '!' = 33 + if peek_c == 61 { + let tokens = tok_append(tokens, "NotEq", "!=") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Not", "!")) + let tokens = tok_append(tokens, "Not", "!") let i = i + 1 } } else { - if ch == "<" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("LtEq", "<=")) + if c == 60 { + // '<' = 60 + if peek_c == 61 { + let tokens = tok_append(tokens, "LtEq", "<=") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Lt", "<")) + let tokens = tok_append(tokens, "Lt", "<") let i = i + 1 } } else { - if ch == ">" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("GtEq", ">=")) + if c == 62 { + // '>' = 62 + if peek_c == 61 { + let tokens = tok_append(tokens, "GtEq", ">=") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Gt", ">")) + let tokens = tok_append(tokens, "Gt", ">") let i = i + 1 } } else { - if ch == "&" { - if peek_ch == "&" { - let tokens = native_list_append(tokens, make_tok("And", "&&")) + if c == 38 { + // '&' = 38 + if peek_c == 38 { + let tokens = tok_append(tokens, "And", "&&") let i = i + 2 } else { let i = i + 1 } } else { - if ch == "|" { - if peek_ch == "|" { - let tokens = native_list_append(tokens, make_tok("Or", "||")) + if c == 124 { + // '|' = 124 + if peek_c == 124 { + let tokens = tok_append(tokens, "Or", "||") let i = i + 2 } else { - if peek_ch == ">" { - let tokens = native_list_append(tokens, make_tok("PipeOp", "|>")) + if peek_c == 62 { + // '>' = 62 + let tokens = tok_append(tokens, "PipeOp", "|>") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Pipe", "|")) + let tokens = tok_append(tokens, "Pipe", "|") let i = i + 1 } } } else { - if ch == "-" { - if peek_ch == ">" { - let tokens = native_list_append(tokens, make_tok("Arrow", "->")) + if c == 45 { + // '-' = 45 + if peek_c == 62 { + // '>' = 62 + let tokens = tok_append(tokens, "Arrow", "->") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Minus", "-")) + let tokens = tok_append(tokens, "Minus", "-") let i = i + 1 } } else { - if ch == ":" { - if peek_ch == ":" { - let tokens = native_list_append(tokens, make_tok("ColonColon", "::")) + if c == 58 { + // ':' = 58 + if peek_c == 58 { + let tokens = tok_append(tokens, "ColonColon", "::") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Colon", ":")) + let tokens = tok_append(tokens, "Colon", ":") let i = i + 1 } } else { - if ch == "+" { - let tokens = native_list_append(tokens, make_tok("Plus", "+")) + if c == 43 { + // '+' = 43 + let tokens = tok_append(tokens, "Plus", "+") let i = i + 1 } else { - if ch == "*" { - let tokens = native_list_append(tokens, make_tok("Star", "*")) + if c == 42 { + // '*' = 42 + let tokens = tok_append(tokens, "Star", "*") let i = i + 1 } else { - if ch == "%" { - let tokens = native_list_append(tokens, make_tok("Percent", "%")) + if c == 37 { + // '%' = 37 + let tokens = tok_append(tokens, "Percent", "%") let i = i + 1 } else { - if ch == "(" { - let tokens = native_list_append(tokens, make_tok("LParen", "(")) + if c == 40 { + // '(' = 40 + let tokens = tok_append(tokens, "LParen", "(") let i = i + 1 } else { - if ch == ")" { - let tokens = native_list_append(tokens, make_tok("RParen", ")")) + if c == 41 { + // ')' = 41 + let tokens = tok_append(tokens, "RParen", ")") let i = i + 1 } else { - if ch == "{" { - let tokens = native_list_append(tokens, make_tok("LBrace", "{")) + if c == 123 { + // '{' = 123 + let tokens = tok_append(tokens, "LBrace", "{") let i = i + 1 } else { - if ch == "}" { - let tokens = native_list_append(tokens, make_tok("RBrace", "}")) + if c == 125 { + // '}' = 125 + let tokens = tok_append(tokens, "RBrace", "}") let i = i + 1 } else { - if ch == "[" { - let tokens = native_list_append(tokens, make_tok("LBracket", "[")) + if c == 91 { + // '[' = 91 + let tokens = tok_append(tokens, "LBracket", "[") let i = i + 1 } else { - if ch == "]" { - let tokens = native_list_append(tokens, make_tok("RBracket", "]")) + if c == 93 { + // ']' = 93 + let tokens = tok_append(tokens, "RBracket", "]") let i = i + 1 } else { - if ch == "," { - let tokens = native_list_append(tokens, make_tok("Comma", ",")) + if c == 44 { + // ',' = 44 + let tokens = tok_append(tokens, "Comma", ",") let i = i + 1 } else { - if ch == "." { - // Check for ..= (inclusive range) before .. (exclusive range) before single . + if c == 46 { + // '.' = 46: check for ..= or .. let peek2_i = i + 2 - let peek2_ch = "" + let peek2_c: Int = -1 if peek2_i < total { - let peek2_ch: String = native_list_get(chars, peek2_i) + let peek2_c: Int = str_char_code(source, peek2_i) } - if peek_ch == "." { - if peek2_ch == "=" { - let tokens = native_list_append(tokens, make_tok("DotDotEq", "..=")) + if peek_c == 46 { + // '..' prefix + if peek2_c == 61 { + // '..=' = 46 46 61 + let tokens = tok_append(tokens, "DotDotEq", "..=") let i = i + 3 } else { - let tokens = native_list_append(tokens, make_tok("DotDot", "..")) + let tokens = tok_append(tokens, "DotDot", "..") let i = i + 2 } } else { - let tokens = native_list_append(tokens, make_tok("Dot", ".")) + let tokens = tok_append(tokens, "Dot", ".") let i = i + 1 } } else { - if ch == ";" { - let tokens = native_list_append(tokens, make_tok("Semicolon", ";")) + if c == 59 { + // ';' = 59 + let tokens = tok_append(tokens, "Semicolon", ";") let i = i + 1 } else { - if ch == "@" { - let tokens = native_list_append(tokens, make_tok("At", "@")) + if c == 64 { + // '@' = 64 + let tokens = tok_append(tokens, "At", "@") let i = i + 1 } else { - if ch == "?" { - let tokens = native_list_append(tokens, make_tok("QuestionMark", "?")) + if c == 63 { + // '?' = 63 + let tokens = tok_append(tokens, "QuestionMark", "?") let i = i + 1 } else { // unknown char - skip @@ -988,6 +1074,6 @@ fn lex(source: String) -> [Map] { } } - let tokens = native_list_append(tokens, make_tok("Eof", "")) + let tokens = tok_append(tokens, "Eof", "") tokens } diff --git a/lang/el-compiler/src/parser.el b/lang/el-compiler/src/parser.el index 62fda8c..f4656f8 100644 --- a/lang/el-compiler/src/parser.el +++ b/lang/el-compiler/src/parser.el @@ -9,25 +9,28 @@ // The token list is passed as a parameter to all parse functions. // native_list_get is used to index into it without cloning. // -// Entry point: fn parse(tokens: [Map]) -> [Map] +// Entry point: fn parse(tokens: [Any]) -> [Map] // -- Token access helpers ------------------------------------------------------ +// Tokens is a flat [Any] list: tokens[2*i] = kind, tokens[2*i+1] = value. +// This avoids one ElMap allocation per token (~112B each), saving ~4MB on large +// programs. All callers use these helpers -- only these three need updating. -fn tok_at(tokens: [Map], pos: Int) -> Map { - native_list_get(tokens, pos) +fn tok_at(tokens: [Any], pos: Int) -> Map { + let kind: String = native_list_get(tokens, pos * 2) + let value: String = native_list_get(tokens, pos * 2 + 1) + { "kind": kind, "value": value } } -fn tok_kind(tokens: [Map], pos: Int) -> String { - let t = native_list_get(tokens, pos) - t["kind"] +fn tok_kind(tokens: [Any], pos: Int) -> String { + native_list_get(tokens, pos * 2) } -fn tok_value(tokens: [Map], pos: Int) -> String { - let t = native_list_get(tokens, pos) - t["value"] +fn tok_value(tokens: [Any], pos: Int) -> String { + native_list_get(tokens, pos * 2 + 1) } -fn expect(tokens: [Map], pos: Int, kind: String) -> Int { +fn expect(tokens: [Any], pos: Int, kind: String) -> Int { let k = tok_kind(tokens, pos) if k == kind { return pos + 1 @@ -46,7 +49,7 @@ fn make_result(node: Map, pos: Int) -> Map { // Skips over a type annotation, returning the new position. // Types can be: Ident, [Type], Map, Type?, Type -fn skip_type(tokens: [Map], pos: Int) -> Int { +fn skip_type(tokens: [Any], pos: Int) -> Int { let k = tok_kind(tokens, pos) // Array type: [Type] if k == "LBracket" { @@ -103,7 +106,7 @@ fn skip_type(tokens: [Map], pos: Int) -> Int { // -- Parameter list ------------------------------------------------------------ // Parses (name: Type, name: Type, ...) - returns { "params": [...], "pos": ... } -fn parse_params(tokens: [Map], pos: Int) -> Map { +fn parse_params(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "LParen") let params: [Map] = native_list_empty() let running = true @@ -292,7 +295,7 @@ fn is_void_element(name: String) -> Bool { // Collect tokens as text content until we hit Lt, LBrace, Eof, or a // closing-tag marker (Lt Slash). Returns { "text": "...", "pos": p } -fn parse_html_text_tokens(tokens: [Map], pos: Int) -> Map { +fn parse_html_text_tokens(tokens: [Any], pos: Int) -> Map { let parts: [String] = native_list_empty() let p = pos let running = true @@ -322,7 +325,7 @@ fn parse_html_text_tokens(tokens: [Map], pos: Int) -> Map). -fn parse_html_attrs(tokens: [Map], pos: Int) -> Map { +fn parse_html_attrs(tokens: [Any], pos: Int) -> Map { let attrs: [Map] = native_list_empty() let p = pos let running = true @@ -374,7 +377,7 @@ fn parse_html_attrs(tokens: [Map], pos: Int) -> Map { // Parse the children of an HTML element until we see the closing tag // or EOF. Returns { "children": [...], "pos": p_after_closing_tag } -fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) -> Map { +fn parse_html_children(tokens: [Any], pos: Int, parent_tag: String) -> Map { let children: [Map] = native_list_empty() let p = pos let running = true @@ -514,14 +517,14 @@ fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) // Parse body of {#each} until {/each}. Mirrors parse_html_children but // stops at the {/each} sentinel rather than a closing element tag. -fn parse_html_each_body(tokens: [Map], pos: Int) -> Map { +fn parse_html_each_body(tokens: [Any], pos: Int) -> Map { parse_html_children(tokens, pos, "__each__") } // Parse a single HTML element: children // or self-closing: // Pos points to the Lt token. -fn parse_html_element(tokens: [Map], pos: Int) -> Map { +fn parse_html_element(tokens: [Any], pos: Int) -> Map { let p = pos // consume < let p = expect(tokens, p, "Lt") @@ -558,7 +561,7 @@ fn parse_html_element(tokens: [Map], pos: Int) -> Map // Entry point for HTML template parsing. // Pos points to Lt (or Lt Not for ). // May parse an optional prefix followed by the root element. -fn parse_html_template(tokens: [Map], pos: Int) -> Map { +fn parse_html_template(tokens: [Any], pos: Int) -> Map { let p = pos // Check for let doctype = false @@ -596,7 +599,7 @@ fn parse_html_template(tokens: [Map], pos: Int) -> Map make_result({ "expr": "HtmlTemplate", "root": root_with_doctype }, p) } -fn parse_primary(tokens: [Map], pos: Int) -> Map { +fn parse_primary(tokens: [Any], pos: Int) -> Map { let k = tok_kind(tokens, pos) let v = tok_value(tokens, pos) @@ -819,7 +822,7 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { make_result({ "expr": "Nil" }, pos + 1) } -fn parse_if(tokens: [Map], pos: Int) -> Map { +fn parse_if(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "If") // Suppress Map-literal parsing in the cond so a stray `{` (the start // of the then-block) isn't gobbled as a Map. @@ -855,7 +858,7 @@ fn parse_if(tokens: [Map], pos: Int) -> Map { make_result({ "expr": "If", "cond": cond, "then": then_stmts, "else": else_stmts, "has_else": has_else }, p) } -fn parse_match(tokens: [Map], pos: Int) -> Map { +fn parse_match(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "Match") let prev_no_block: String = state_get("__no_block_expr") state_set("__no_block_expr", "1") @@ -895,7 +898,7 @@ fn parse_match(tokens: [Map], pos: Int) -> Map { make_result({ "expr": "Match", "subject": subject, "arms": arms }, p) } -fn parse_pattern(tokens: [Map], pos: Int) -> Map { +fn parse_pattern(tokens: [Any], pos: Int) -> Map { let k = tok_kind(tokens, pos) if k == "Ident" { let v = tok_value(tokens, pos) @@ -924,7 +927,7 @@ fn parse_pattern(tokens: [Map], pos: Int) -> Map { make_result({ "pattern": "Wildcard" }, pos + 1) } -fn parse_for_expr(tokens: [Map], pos: Int) -> Map { +fn parse_for_expr(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "For") let item_name = tok_value(tokens, p) let p = p + 1 @@ -941,7 +944,7 @@ fn parse_for_expr(tokens: [Map], pos: Int) -> Map { make_result({ "expr": "For", "item": item_name, "list": list_expr, "body": body }, p) } -fn parse_block(tokens: [Map], pos: Int) -> Map { +fn parse_block(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "LBrace") let stmts: [Map] = native_list_empty() let running = true @@ -998,7 +1001,7 @@ fn is_duration_unit(name: String) -> Bool { false } -fn parse_postfix(tokens: [Map], pos: Int) -> Map { +fn parse_postfix(tokens: [Any], pos: Int) -> Map { let r = parse_primary(tokens, pos) let node = r["node"] let p = r["pos"] @@ -1115,7 +1118,7 @@ fn is_binop(kind: String) -> Bool { false } -fn parse_binop(tokens: [Map], pos: Int, min_prec: Int) -> Map { +fn parse_binop(tokens: [Any], pos: Int, min_prec: Int) -> Map { let r = parse_postfix(tokens, pos) let left = r["node"] let p = r["pos"] @@ -1140,13 +1143,13 @@ fn parse_binop(tokens: [Map], pos: Int, min_prec: Int) -> Map], pos: Int) -> Map { +fn parse_expr(tokens: [Any], pos: Int) -> Map { parse_binop(tokens, pos, 1) } // -- Statement parsing --------------------------------------------------------- -fn parse_stmt(tokens: [Map], pos: Int) -> Map { +fn parse_stmt(tokens: [Any], pos: Int) -> Map { let k = tok_kind(tokens, pos) // let binding @@ -1619,8 +1622,9 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { // -- Top-level parse ------------------------------------------------------------ -fn parse(tokens: [Map]) -> [Map] { - let total: Int = native_list_len(tokens) +fn parse(tokens: [Any]) -> [Map] { + // Flat list: 2 entries per token, so divide by 2 for token count. + let total: Int = native_list_len(tokens) / 2 let stmts: [Map] = native_list_empty() let pos: Int = 0 let running = true