From ce9a2caff43c764fe123d1f65bb9c2711014aaca Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Sun, 3 May 2026 15:50:23 -0500 Subject: [PATCH] add string interpolation to El ("hello ${name}") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lexer gains scan_interp_string which replaces scan_string in the main lex loop. When no ${ is found it behaves identically to before (single Str token). When interpolations are present it emits a flat token sequence — Str, Plus, (expr tokens), Plus, Str, … — that the existing parse_binop / cg_expr BinOp-Plus-string path assembles into nested el_str_concat calls with zero parser or codegen changes. Key design choices: - scan_interp_brace tracks { depth so fn(a, b) inside ${} is safe - inner expr tokens are wrapped in ( ) so operators like + in ${n+1} do not associate with the surrounding concat Plus tokens - \$ escapes to a literal dollar sign; bare $ not before { passes through - empty ${} emits an empty string segment --- el-compiler/src/lexer.el | 254 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 241 insertions(+), 13 deletions(-) diff --git a/el-compiler/src/lexer.el b/el-compiler/src/lexer.el index cef3b33..5ac9423 100644 --- a/el-compiler/src/lexer.el +++ b/el-compiler/src/lexer.el @@ -460,6 +460,238 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map { { "text": str_join(parts, ""), "pos": i } } +// -- String interpolation ------------------------------------------------------ +// +// scan_interp_brace - scan from `start` (the char after `${`) to the matching +// `}`, tracking brace depth so inner braces (e.g. fn calls, map literals) are +// handled correctly. Returns { "text": inner_source, "pos": i_after_close }. +fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map { + let i = start + let parts: [String] = native_list_empty() + let depth = 1 + let running = true + while running { + if i >= total { + let running = false + } else { + let ch: String = native_list_get(chars, i) + if ch == "{" { + let depth = depth + 1 + let parts = native_list_append(parts, ch) + let i = i + 1 + } else { + if ch == "}" { + let depth = depth - 1 + if depth <= 0 { + // Closing brace of the interpolation - stop, do not include it + let i = i + 1 + let running = false + } else { + let parts = native_list_append(parts, ch) + let i = i + 1 + } + } else { + let parts = native_list_append(parts, ch) + let i = i + 1 + } + } + } + } + { "text": str_join(parts, ""), "pos": i } +} + +// interp_tokens_append_all - copy every token from src into dst, skipping the +// trailing Eof sentinel that lex() always appends. Returns the updated dst list. +fn interp_tokens_append_all(dst: [Map], src: [Map]) -> [Map] { + let src_len: Int = native_list_len(src) + let j = 0 + let result = dst + while j < src_len { + let tok: Map = native_list_get(src, j) + let tk: String = tok["kind"] + if tk == "Eof" { + let j = src_len + } else { + let result = native_list_append(result, tok) + let j = j + 1 + } + } + result +} + +// scan_interp_string - scan a string literal that may contain ${expr} +// interpolations. Starts AFTER the opening `"`. +// Returns { "tokens": [token list to inject], "pos": i_after_close_quote }. +// +// For a plain string (no ${}) this emits a single Str token, identical to the +// old scan_string path. For an interpolated string it emits a flat sequence +// of tokens equivalent to the string-concat expression, for example: +// +// "hello ${name}!" +// => Str("hello ") Plus Plus Str("!") +// +// Empty literal segments between adjacent ${ } blocks are omitted. The +// resulting token stream is consumed by the existing parse_binop / parse_primary +// path in the parser with zero parser changes required. +// +// Supported escape sequences: \" \n \t \r \\ \$ (literal dollar sign). +// Nested quotes inside ${} are not supported; use a variable instead. +fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map { + let i = start + let out_tokens: [Map] = native_list_empty() + let cur_part: [String] = native_list_empty() + let has_interp = false + let need_plus = false + let running = true + + while running { + if i >= total { + let running = false + } else { + let ch: String = native_list_get(chars, i) + + if ch == "\\" { + // Escape sequence + let next_i = i + 1 + if next_i < total { + let next_ch: String = native_list_get(chars, next_i) + if next_ch == "$" { + // \$ => literal '$' (escape for interpolation syntax) + let cur_part = native_list_append(cur_part, "$") + let i = next_i + 1 + } else { + if next_ch == "\"" { + let cur_part = native_list_append(cur_part, "\"") + let i = next_i + 1 + } else { + if next_ch == "n" { + let cur_part = native_list_append(cur_part, "\n") + let i = next_i + 1 + } else { + if next_ch == "t" { + let cur_part = native_list_append(cur_part, "\t") + let i = next_i + 1 + } else { + if next_ch == "r" { + let cur_part = native_list_append(cur_part, "\r") + let i = next_i + 1 + } else { + if next_ch == "\\" { + let cur_part = native_list_append(cur_part, "\\") + let i = next_i + 1 + } else { + let cur_part = native_list_append(cur_part, next_ch) + let i = next_i + 1 + } + } + } + } + } + } + } else { + let i = i + 1 + } + } else { + if ch == "\"" { + // Closing quote - stop scanning + let i = i + 1 + let running = false + } else { + if ch == "$" { + // Check for ${ (start of interpolation) + let next_i = i + 1 + let is_interp = false + if next_i < total { + let next_ch: String = native_list_get(chars, next_i) + if next_ch == "{" { + let is_interp = true + } + } + if is_interp { + // Flush the accumulated literal part (if non-empty) + let part_len: Int = native_list_len(cur_part) + if part_len > 0 { + let part_text = str_join(cur_part, "") + if need_plus { + let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+")) + } + let clean_part = part_text + if looks_like_code(part_text) { + let clean_part = strip_code_comments(part_text) + } + let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part)) + let need_plus = true + } + let cur_part = native_list_empty() + let has_interp = true + + // Scan brace-balanced expression source + let brace_result = scan_interp_brace(chars, next_i + 1, total) + let expr_src: String = brace_result["text"] + let new_i: Int = brace_result["pos"] + let i = new_i + + // Re-lex the expression and inline the tokens. + // Wrap in ( ) so that operators inside ${} (e.g. + // age + 1) are parsed as a grouped sub-expression + // rather than merging with the surrounding concat + // Plus tokens at the wrong precedence level. + let inner_toks: [Map] = lex(expr_src) + let inner_len: Int = native_list_len(inner_toks) + + if need_plus { + let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+")) + } + // Empty interpolation ${} => empty string segment + if inner_len <= 1 { + let out_tokens = native_list_append(out_tokens, make_tok("Str", "")) + } else { + let out_tokens = native_list_append(out_tokens, make_tok("LParen", "(")) + let out_tokens = interp_tokens_append_all(out_tokens, inner_toks) + let out_tokens = native_list_append(out_tokens, make_tok("RParen", ")")) + } + let need_plus = true + } else { + // Plain '$' not followed by '{' - treat as literal + let cur_part = native_list_append(cur_part, "$") + let i = i + 1 + } + } else { + let cur_part = native_list_append(cur_part, ch) + let i = i + 1 + } + } + } + } + } + + // Flush remaining literal segment and build final token list + let part_text = str_join(cur_part, "") + let part_len: Int = native_list_len(cur_part) + if has_interp { + // Interpolated string: only emit trailing segment if non-empty + if part_len > 0 { + let clean_part = part_text + if looks_like_code(part_text) { + let clean_part = strip_code_comments(part_text) + } + if need_plus { + let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+")) + } + let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part)) + } + } else { + // Plain string with no interpolation - same behaviour as old scan_string + let clean_text = part_text + if looks_like_code(part_text) { + let clean_text = strip_code_comments(part_text) + } + let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_text)) + } + + { "tokens": out_tokens, "pos": i } +} + // -- Main lexer ---------------------------------------------------------------- fn lex(source: String) -> [Map] { @@ -505,20 +737,16 @@ fn lex(source: String) -> [Map] { let i = i + 1 } } else { - // String literal + // String literal (plain or interpolated with ${expr} syntax). + // scan_interp_string handles both cases: plain strings emit a + // single Str token; interpolated strings emit a flat token + // sequence (Str Plus expr-tokens Plus Str ...) that the parser + // naturally assembles into a BinOp concat tree. if ch == "\"" { - let result = scan_string(chars, i + 1, total) - let str_text: String = result["text"] - let new_pos: Int = result["pos"] - // Compile-time scrub: strings that embed JS or CSS get - // their // line comments and /* block comments stripped - // before the token reaches the parser. Plain prose passes - // through untouched. - let clean_text = str_text - if looks_like_code(str_text) { - let clean_text = strip_code_comments(str_text) - } - let tokens = native_list_append(tokens, make_tok("Str", clean_text)) + let interp_result = scan_interp_string(chars, i + 1, total) + let interp_toks: [Map] = interp_result["tokens"] + let new_pos: Int = interp_result["pos"] + let tokens = interp_tokens_append_all(tokens, interp_toks) let i = new_pos } else { // Number literal