merge compiler/string-interp — string interpolation via lexer desugaring

2026-05-03 15:52:21 -05:00
parent 641227a7d3 ce9a2caff4
commit 2e778ca664
1 changed files with 241 additions and 13 deletions
@@ -460,6 +460,238 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
    { "text": str_join(parts, ""), "pos": i }
 }

+// -- String interpolation ------------------------------------------------------
+//
+// scan_interp_brace - scan from `start` (the char after `${`) to the matching
+// `}`, tracking brace depth so inner braces (e.g. fn calls, map literals) are
+// handled correctly. Returns { "text": inner_source, "pos": i_after_close }.
+fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map<String, Any> {
+    let i = start
+    let parts: [String] = native_list_empty()
+    let depth = 1
+    let running = true
+    while running {
+        if i >= total {
+            let running = false
+        } else {
+            let ch: String = native_list_get(chars, i)
+            if ch == "{" {
+                let depth = depth + 1
+                let parts = native_list_append(parts, ch)
+                let i = i + 1
+            } else {
+                if ch == "}" {
+                    let depth = depth - 1
+                    if depth <= 0 {
+                        // Closing brace of the interpolation - stop, do not include it
+                        let i = i + 1
+                        let running = false
+                    } else {
+                        let parts = native_list_append(parts, ch)
+                        let i = i + 1
+                    }
+                } else {
+                    let parts = native_list_append(parts, ch)
+                    let i = i + 1
+                }
+            }
+        }
+    }
+    { "text": str_join(parts, ""), "pos": i }
+}
+
+// interp_tokens_append_all - copy every token from src into dst, skipping the
+// trailing Eof sentinel that lex() always appends. Returns the updated dst list.
+fn interp_tokens_append_all(dst: [Map<String, Any>], src: [Map<String, Any>]) -> [Map<String, Any>] {
+    let src_len: Int = native_list_len(src)
+    let j = 0
+    let result = dst
+    while j < src_len {
+        let tok: Map<String, Any> = native_list_get(src, j)
+        let tk: String = tok["kind"]
+        if tk == "Eof" {
+            let j = src_len
+        } else {
+            let result = native_list_append(result, tok)
+            let j = j + 1
+        }
+    }
+    result
+}
+
+// scan_interp_string - scan a string literal that may contain ${expr}
+// interpolations. Starts AFTER the opening `"`.
+// Returns { "tokens": [token list to inject], "pos": i_after_close_quote }.
+//
+// For a plain string (no ${}) this emits a single Str token, identical to the
+// old scan_string path. For an interpolated string it emits a flat sequence
+// of tokens equivalent to the string-concat expression, for example:
+//
+//   "hello ${name}!"
+//   => Str("hello ") Plus <tokens for name> Plus Str("!")
+//
+// Empty literal segments between adjacent ${ } blocks are omitted. The
+// resulting token stream is consumed by the existing parse_binop / parse_primary
+// path in the parser with zero parser changes required.
+//
+// Supported escape sequences: \" \n \t \r \\ \$ (literal dollar sign).
+// Nested quotes inside ${} are not supported; use a variable instead.
+fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
+    let i = start
+    let out_tokens: [Map<String, Any>] = native_list_empty()
+    let cur_part: [String] = native_list_empty()
+    let has_interp = false
+    let need_plus = false
+    let running = true
+
+    while running {
+        if i >= total {
+            let running = false
+        } else {
+            let ch: String = native_list_get(chars, i)
+
+            if ch == "\\" {
+                // Escape sequence
+                let next_i = i + 1
+                if next_i < total {
+                    let next_ch: String = native_list_get(chars, next_i)
+                    if next_ch == "$" {
+                        // \$ => literal '$' (escape for interpolation syntax)
+                        let cur_part = native_list_append(cur_part, "$")
+                        let i = next_i + 1
+                    } else {
+                        if next_ch == "\"" {
+                            let cur_part = native_list_append(cur_part, "\"")
+                            let i = next_i + 1
+                        } else {
+                            if next_ch == "n" {
+                                let cur_part = native_list_append(cur_part, "\n")
+                                let i = next_i + 1
+                            } else {
+                                if next_ch == "t" {
+                                    let cur_part = native_list_append(cur_part, "\t")
+                                    let i = next_i + 1
+                                } else {
+                                    if next_ch == "r" {
+                                        let cur_part = native_list_append(cur_part, "\r")
+                                        let i = next_i + 1
+                                    } else {
+                                        if next_ch == "\\" {
+                                            let cur_part = native_list_append(cur_part, "\\")
+                                            let i = next_i + 1
+                                        } else {
+                                            let cur_part = native_list_append(cur_part, next_ch)
+                                            let i = next_i + 1
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    let i = i + 1
+                }
+            } else {
+                if ch == "\"" {
+                    // Closing quote - stop scanning
+                    let i = i + 1
+                    let running = false
+                } else {
+                    if ch == "$" {
+                        // Check for ${ (start of interpolation)
+                        let next_i = i + 1
+                        let is_interp = false
+                        if next_i < total {
+                            let next_ch: String = native_list_get(chars, next_i)
+                            if next_ch == "{" {
+                                let is_interp = true
+                            }
+                        }
+                        if is_interp {
+                            // Flush the accumulated literal part (if non-empty)
+                            let part_len: Int = native_list_len(cur_part)
+                            if part_len > 0 {
+                                let part_text = str_join(cur_part, "")
+                                if need_plus {
+                                    let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
+                                }
+                                let clean_part = part_text
+                                if looks_like_code(part_text) {
+                                    let clean_part = strip_code_comments(part_text)
+                                }
+                                let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part))
+                                let need_plus = true
+                            }
+                            let cur_part = native_list_empty()
+                            let has_interp = true
+
+                            // Scan brace-balanced expression source
+                            let brace_result = scan_interp_brace(chars, next_i + 1, total)
+                            let expr_src: String = brace_result["text"]
+                            let new_i: Int = brace_result["pos"]
+                            let i = new_i
+
+                            // Re-lex the expression and inline the tokens.
+                            // Wrap in ( ) so that operators inside ${} (e.g.
+                            // age + 1) are parsed as a grouped sub-expression
+                            // rather than merging with the surrounding concat
+                            // Plus tokens at the wrong precedence level.
+                            let inner_toks: [Map<String, Any>] = lex(expr_src)
+                            let inner_len: Int = native_list_len(inner_toks)
+
+                            if need_plus {
+                                let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
+                            }
+                            // Empty interpolation ${} => empty string segment
+                            if inner_len <= 1 {
+                                let out_tokens = native_list_append(out_tokens, make_tok("Str", ""))
+                            } else {
+                                let out_tokens = native_list_append(out_tokens, make_tok("LParen", "("))
+                                let out_tokens = interp_tokens_append_all(out_tokens, inner_toks)
+                                let out_tokens = native_list_append(out_tokens, make_tok("RParen", ")"))
+                            }
+                            let need_plus = true
+                        } else {
+                            // Plain '$' not followed by '{' - treat as literal
+                            let cur_part = native_list_append(cur_part, "$")
+                            let i = i + 1
+                        }
+                    } else {
+                        let cur_part = native_list_append(cur_part, ch)
+                        let i = i + 1
+                    }
+                }
+            }
+        }
+    }
+
+    // Flush remaining literal segment and build final token list
+    let part_text = str_join(cur_part, "")
+    let part_len: Int = native_list_len(cur_part)
+    if has_interp {
+        // Interpolated string: only emit trailing segment if non-empty
+        if part_len > 0 {
+            let clean_part = part_text
+            if looks_like_code(part_text) {
+                let clean_part = strip_code_comments(part_text)
+            }
+            if need_plus {
+                let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
+            }
+            let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part))
+        }
+    } else {
+        // Plain string with no interpolation - same behaviour as old scan_string
+        let clean_text = part_text
+        if looks_like_code(part_text) {
+            let clean_text = strip_code_comments(part_text)
+        }
+        let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_text))
+    }
+
+    { "tokens": out_tokens, "pos": i }
+}
+
 // -- Main lexer ----------------------------------------------------------------

 fn lex(source: String) -> [Map<String, Any>] {
@@ -505,20 +737,16 @@ fn lex(source: String) -> [Map<String, Any>] {
                    let i = i + 1
                }
            } else {
-                // String literal
+                // String literal (plain or interpolated with ${expr} syntax).
+                // scan_interp_string handles both cases: plain strings emit a
+                // single Str token; interpolated strings emit a flat token
+                // sequence (Str Plus expr-tokens Plus Str ...) that the parser
+                // naturally assembles into a BinOp concat tree.
                if ch == "\"" {
-                    let result = scan_string(chars, i + 1, total)
-                    let str_text: String = result["text"]
-                    let new_pos: Int = result["pos"]
-                    // Compile-time scrub: strings that embed JS or CSS get
-                    // their // line comments and /* block comments stripped
-                    // before the token reaches the parser. Plain prose passes
-                    // through untouched.
-                    let clean_text = str_text
-                    if looks_like_code(str_text) {
-                        let clean_text = strip_code_comments(str_text)
-                    }
-                    let tokens = native_list_append(tokens, make_tok("Str", clean_text))
+                    let interp_result = scan_interp_string(chars, i + 1, total)
+                    let interp_toks: [Map<String, Any>] = interp_result["tokens"]
+                    let new_pos: Int = interp_result["pos"]
+                    let tokens = interp_tokens_append_all(tokens, interp_toks)
                    let i = new_pos
                } else {
                    // Number literal