merge compiler/string-interp — string interpolation via lexer desugaring
This commit is contained in:
+241
-13
@@ -460,6 +460,238 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
}
|
||||
|
||||
// -- String interpolation ------------------------------------------------------
|
||||
//
|
||||
// scan_interp_brace - scan from `start` (the char after `${`) to the matching
|
||||
// `}`, tracking brace depth so inner braces (e.g. fn calls, map literals) are
|
||||
// handled correctly. Returns { "text": inner_source, "pos": i_after_close }.
|
||||
fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let depth = 1
|
||||
let running = true
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
if ch == "{" {
|
||||
let depth = depth + 1
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "}" {
|
||||
let depth = depth - 1
|
||||
if depth <= 0 {
|
||||
// Closing brace of the interpolation - stop, do not include it
|
||||
let i = i + 1
|
||||
let running = false
|
||||
} else {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
}
|
||||
|
||||
// interp_tokens_append_all - copy every token from src into dst, skipping the
|
||||
// trailing Eof sentinel that lex() always appends. Returns the updated dst list.
|
||||
fn interp_tokens_append_all(dst: [Map<String, Any>], src: [Map<String, Any>]) -> [Map<String, Any>] {
|
||||
let src_len: Int = native_list_len(src)
|
||||
let j = 0
|
||||
let result = dst
|
||||
while j < src_len {
|
||||
let tok: Map<String, Any> = native_list_get(src, j)
|
||||
let tk: String = tok["kind"]
|
||||
if tk == "Eof" {
|
||||
let j = src_len
|
||||
} else {
|
||||
let result = native_list_append(result, tok)
|
||||
let j = j + 1
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// scan_interp_string - scan a string literal that may contain ${expr}
|
||||
// interpolations. Starts AFTER the opening `"`.
|
||||
// Returns { "tokens": [token list to inject], "pos": i_after_close_quote }.
|
||||
//
|
||||
// For a plain string (no ${}) this emits a single Str token, identical to the
|
||||
// old scan_string path. For an interpolated string it emits a flat sequence
|
||||
// of tokens equivalent to the string-concat expression, for example:
|
||||
//
|
||||
// "hello ${name}!"
|
||||
// => Str("hello ") Plus <tokens for name> Plus Str("!")
|
||||
//
|
||||
// Empty literal segments between adjacent ${ } blocks are omitted. The
|
||||
// resulting token stream is consumed by the existing parse_binop / parse_primary
|
||||
// path in the parser with zero parser changes required.
|
||||
//
|
||||
// Supported escape sequences: \" \n \t \r \\ \$ (literal dollar sign).
|
||||
// Nested quotes inside ${} are not supported; use a variable instead.
|
||||
fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let out_tokens: [Map<String, Any>] = native_list_empty()
|
||||
let cur_part: [String] = native_list_empty()
|
||||
let has_interp = false
|
||||
let need_plus = false
|
||||
let running = true
|
||||
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
|
||||
if ch == "\\" {
|
||||
// Escape sequence
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
if next_ch == "$" {
|
||||
// \$ => literal '$' (escape for interpolation syntax)
|
||||
let cur_part = native_list_append(cur_part, "$")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "\"" {
|
||||
let cur_part = native_list_append(cur_part, "\"")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "n" {
|
||||
let cur_part = native_list_append(cur_part, "\n")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "t" {
|
||||
let cur_part = native_list_append(cur_part, "\t")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "r" {
|
||||
let cur_part = native_list_append(cur_part, "\r")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "\\" {
|
||||
let cur_part = native_list_append(cur_part, "\\")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
let cur_part = native_list_append(cur_part, next_ch)
|
||||
let i = next_i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "\"" {
|
||||
// Closing quote - stop scanning
|
||||
let i = i + 1
|
||||
let running = false
|
||||
} else {
|
||||
if ch == "$" {
|
||||
// Check for ${ (start of interpolation)
|
||||
let next_i = i + 1
|
||||
let is_interp = false
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
if next_ch == "{" {
|
||||
let is_interp = true
|
||||
}
|
||||
}
|
||||
if is_interp {
|
||||
// Flush the accumulated literal part (if non-empty)
|
||||
let part_len: Int = native_list_len(cur_part)
|
||||
if part_len > 0 {
|
||||
let part_text = str_join(cur_part, "")
|
||||
if need_plus {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
|
||||
}
|
||||
let clean_part = part_text
|
||||
if looks_like_code(part_text) {
|
||||
let clean_part = strip_code_comments(part_text)
|
||||
}
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part))
|
||||
let need_plus = true
|
||||
}
|
||||
let cur_part = native_list_empty()
|
||||
let has_interp = true
|
||||
|
||||
// Scan brace-balanced expression source
|
||||
let brace_result = scan_interp_brace(chars, next_i + 1, total)
|
||||
let expr_src: String = brace_result["text"]
|
||||
let new_i: Int = brace_result["pos"]
|
||||
let i = new_i
|
||||
|
||||
// Re-lex the expression and inline the tokens.
|
||||
// Wrap in ( ) so that operators inside ${} (e.g.
|
||||
// age + 1) are parsed as a grouped sub-expression
|
||||
// rather than merging with the surrounding concat
|
||||
// Plus tokens at the wrong precedence level.
|
||||
let inner_toks: [Map<String, Any>] = lex(expr_src)
|
||||
let inner_len: Int = native_list_len(inner_toks)
|
||||
|
||||
if need_plus {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
|
||||
}
|
||||
// Empty interpolation ${} => empty string segment
|
||||
if inner_len <= 1 {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", ""))
|
||||
} else {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("LParen", "("))
|
||||
let out_tokens = interp_tokens_append_all(out_tokens, inner_toks)
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("RParen", ")"))
|
||||
}
|
||||
let need_plus = true
|
||||
} else {
|
||||
// Plain '$' not followed by '{' - treat as literal
|
||||
let cur_part = native_list_append(cur_part, "$")
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let cur_part = native_list_append(cur_part, ch)
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining literal segment and build final token list
|
||||
let part_text = str_join(cur_part, "")
|
||||
let part_len: Int = native_list_len(cur_part)
|
||||
if has_interp {
|
||||
// Interpolated string: only emit trailing segment if non-empty
|
||||
if part_len > 0 {
|
||||
let clean_part = part_text
|
||||
if looks_like_code(part_text) {
|
||||
let clean_part = strip_code_comments(part_text)
|
||||
}
|
||||
if need_plus {
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+"))
|
||||
}
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part))
|
||||
}
|
||||
} else {
|
||||
// Plain string with no interpolation - same behaviour as old scan_string
|
||||
let clean_text = part_text
|
||||
if looks_like_code(part_text) {
|
||||
let clean_text = strip_code_comments(part_text)
|
||||
}
|
||||
let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_text))
|
||||
}
|
||||
|
||||
{ "tokens": out_tokens, "pos": i }
|
||||
}
|
||||
|
||||
// -- Main lexer ----------------------------------------------------------------
|
||||
|
||||
fn lex(source: String) -> [Map<String, Any>] {
|
||||
@@ -505,20 +737,16 @@ fn lex(source: String) -> [Map<String, Any>] {
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
// String literal
|
||||
// String literal (plain or interpolated with ${expr} syntax).
|
||||
// scan_interp_string handles both cases: plain strings emit a
|
||||
// single Str token; interpolated strings emit a flat token
|
||||
// sequence (Str Plus expr-tokens Plus Str ...) that the parser
|
||||
// naturally assembles into a BinOp concat tree.
|
||||
if ch == "\"" {
|
||||
let result = scan_string(chars, i + 1, total)
|
||||
let str_text: String = result["text"]
|
||||
let new_pos: Int = result["pos"]
|
||||
// Compile-time scrub: strings that embed JS or CSS get
|
||||
// their // line comments and /* block comments stripped
|
||||
// before the token reaches the parser. Plain prose passes
|
||||
// through untouched.
|
||||
let clean_text = str_text
|
||||
if looks_like_code(str_text) {
|
||||
let clean_text = strip_code_comments(str_text)
|
||||
}
|
||||
let tokens = native_list_append(tokens, make_tok("Str", clean_text))
|
||||
let interp_result = scan_interp_string(chars, i + 1, total)
|
||||
let interp_toks: [Map<String, Any>] = interp_result["tokens"]
|
||||
let new_pos: Int = interp_result["pos"]
|
||||
let tokens = interp_tokens_append_all(tokens, interp_toks)
|
||||
let i = new_pos
|
||||
} else {
|
||||
// Number literal
|
||||
|
||||
Reference in New Issue
Block a user