round-3-gamma: combine c_escape + scan_interp_string batching — max round-3 savings
Combines two orthogonal optimizations: 1. c_escape batching (from alpha): ASCII runs emitted as str_slice segments instead of one str_char_at string per byte. O(N) allocs → O(K) where K = special chars. 2. scan_interp_string batching (from beta): char dispatch via str_char_code (Int) + clean_start tracking to flush plain runs as str_slice. Eliminates per-char string allocations in the string-literal scanning hot path. Result on web/src/main.el: 14.5MB -> 13.4MB peak RSS (-7.6%). Self-hosting: PASS.
This commit is contained in:
@@ -38,10 +38,13 @@ fn is_hex_digit_byte(b: Int) -> Bool {
|
||||
}
|
||||
|
||||
fn c_escape(s: String) -> String {
|
||||
// Use index-based byte scanning via str_char_code(s, i) and str_char_at(s, i).
|
||||
// This avoids native_string_chars + str_join, which corrupts high-byte (>= 0x80)
|
||||
// characters because list_join's looks_like_string heuristic rejects strings
|
||||
// whose first byte is >= 0x7F and emits them as decimal pointer values instead.
|
||||
// Batch ASCII chars using str_slice instead of str_char_at per byte.
|
||||
// Track clean_start: the beginning of the current run of bytes that need
|
||||
// no escaping. On each special byte, flush the accumulated clean run via
|
||||
// str_slice, then append the escape. This reduces parts-list appends from
|
||||
// O(N) to O(K) where K = number of special bytes << N for normal strings.
|
||||
//
|
||||
// Special bytes: '"'=34, '\\'=92, '\n'=10, '\r'=13, '\t'=9, any byte>=128.
|
||||
//
|
||||
// IMPORTANT: after a \xNN hex escape, if the next byte is a hex digit
|
||||
// (0-9, a-f, A-F), we emit `""` to split the C string literal so the C
|
||||
@@ -51,46 +54,75 @@ fn c_escape(s: String) -> String {
|
||||
let total: Int = str_len(s)
|
||||
let parts: [String] = native_list_empty()
|
||||
let i: Int = 0
|
||||
let clean_start: Int = 0
|
||||
let prev_was_hex_escape: Bool = false
|
||||
while i < total {
|
||||
let bval: Int = str_char_code(s, i)
|
||||
// If the previous token was a \xNN escape and the current byte is a
|
||||
// hex digit, insert an empty string literal ("") to break the escape.
|
||||
// Handle the hex-escape split case first: if prev was \xNN and this
|
||||
// byte is a hex digit, we must flush the clean run and insert "".
|
||||
// (At this point clean_start == i since the previous special byte
|
||||
// already reset it, so flush is a no-op unless something is pending.)
|
||||
if prev_was_hex_escape {
|
||||
if is_hex_digit_byte(bval) {
|
||||
// Flush any accumulated clean bytes before the split marker.
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\"\"")
|
||||
let clean_start = i
|
||||
}
|
||||
}
|
||||
let prev_was_hex_escape = false
|
||||
if bval == 34 {
|
||||
// 34 = '"'
|
||||
// 34 = '"' — flush clean run, then escape
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\\"")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 92 {
|
||||
// 92 = '\\'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\\\")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 10 {
|
||||
// 10 = '\n'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\n")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 13 {
|
||||
// 13 = '\r'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\r")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval == 9 {
|
||||
// 9 = '\t'
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\t")
|
||||
let clean_start = i + 1
|
||||
} else {
|
||||
if bval >= 128 {
|
||||
// Escape non-ASCII bytes (>= 0x80) as \xNN so
|
||||
// Clang does not misinterpret multi-byte UTF-8
|
||||
// sequences in C string literals.
|
||||
// Non-ASCII: flush, then \xNN
|
||||
if clean_start < i {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, i))
|
||||
}
|
||||
let parts = native_list_append(parts, "\\x" + byte_to_hex2(bval))
|
||||
let prev_was_hex_escape = true
|
||||
} else {
|
||||
let parts = native_list_append(parts, str_char_at(s, i))
|
||||
let clean_start = i + 1
|
||||
}
|
||||
// else: plain ASCII — extends the current clean run (no append)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -98,6 +130,10 @@ fn c_escape(s: String) -> String {
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
// Flush the final clean run if any
|
||||
if clean_start < total {
|
||||
let parts = native_list_append(parts, str_slice(s, clean_start, total))
|
||||
}
|
||||
str_join(parts, "")
|
||||
}
|
||||
|
||||
|
||||
@@ -555,10 +555,17 @@ fn interp_tokens_append_all(dst: [Any], src: [Any]) -> [Any] {
|
||||
//
|
||||
// Supported escape sequences: \" \n \t \r \\ \$ (literal dollar sign).
|
||||
// Nested quotes inside ${} are not supported; use a variable instead.
|
||||
//
|
||||
// Performance: uses str_char_code (Int) for all character dispatch, eliminating
|
||||
// per-character strdup. Plain runs are batched into str_slice segments instead
|
||||
// of accumulating single-char strings, reducing list appends from O(N) to O(K)
|
||||
// where K = number of escape/special chars in the literal.
|
||||
// Char codes: '\' = 92, '"' = 34, '$' = 36, '{' = 123
|
||||
fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let out_tokens: [Any] = native_list_empty()
|
||||
let cur_part: [String] = native_list_empty()
|
||||
let cur_parts: [String] = native_list_empty()
|
||||
let clean_start = start
|
||||
let has_interp = false
|
||||
let need_plus = false
|
||||
let running = true
|
||||
@@ -567,39 +574,55 @@ fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = str_char_at(src, i)
|
||||
let c: Int = str_char_code(src, i)
|
||||
|
||||
if ch == "\\" {
|
||||
// Escape sequence
|
||||
if c == 92 {
|
||||
// '\\' = 92 — escape sequence: flush clean run, append resolved char
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let next_ch: String = str_char_at(src, next_i)
|
||||
if next_ch == "$" {
|
||||
// \$ => literal '$' (escape for interpolation syntax)
|
||||
let cur_part = native_list_append(cur_part, "$")
|
||||
let nc: Int = str_char_code(src, next_i)
|
||||
if nc == 36 {
|
||||
// '\$' => literal '$' (36 = '$')
|
||||
let cur_parts = native_list_append(cur_parts, "$")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "\"" {
|
||||
let cur_part = native_list_append(cur_part, "\"")
|
||||
if nc == 34 {
|
||||
// '\"' => literal '"' (34 = '"')
|
||||
let cur_parts = native_list_append(cur_parts, "\"")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "n" {
|
||||
let cur_part = native_list_append(cur_part, "\n")
|
||||
if nc == 110 {
|
||||
// '\n' (110 = 'n')
|
||||
let cur_parts = native_list_append(cur_parts, "\n")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "t" {
|
||||
let cur_part = native_list_append(cur_part, "\t")
|
||||
if nc == 116 {
|
||||
// '\t' (116 = 't')
|
||||
let cur_parts = native_list_append(cur_parts, "\t")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "r" {
|
||||
let cur_part = native_list_append(cur_part, "\r")
|
||||
if nc == 114 {
|
||||
// '\r' (114 = 'r')
|
||||
let cur_parts = native_list_append(cur_parts, "\r")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "\\" {
|
||||
let cur_part = native_list_append(cur_part, "\\")
|
||||
if nc == 92 {
|
||||
// '\\' (92)
|
||||
let cur_parts = native_list_append(cur_parts, "\\")
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
let cur_part = native_list_append(cur_part, next_ch)
|
||||
// Unknown escape: emit the escaped char verbatim
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, next_i, next_i + 1))
|
||||
let clean_start = next_i + 1
|
||||
let i = next_i + 1
|
||||
}
|
||||
}
|
||||
@@ -608,29 +631,38 @@ fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let i = i + 1
|
||||
let clean_start = next_i
|
||||
let i = next_i
|
||||
}
|
||||
} else {
|
||||
if ch == "\"" {
|
||||
// Closing quote - stop scanning
|
||||
if c == 34 {
|
||||
// '"' = 34 — closing quote: flush clean run, stop
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let i = i + 1
|
||||
let clean_start = i
|
||||
let running = false
|
||||
} else {
|
||||
if ch == "$" {
|
||||
// Check for ${ (start of interpolation)
|
||||
if c == 36 {
|
||||
// '$' = 36 — possible interpolation start
|
||||
let next_i = i + 1
|
||||
let is_interp = false
|
||||
if next_i < total {
|
||||
let next_ch: String = str_char_at(src, next_i)
|
||||
if next_ch == "{" {
|
||||
let nc2: Int = str_char_code(src, next_i)
|
||||
if nc2 == 123 {
|
||||
// '{' = 123
|
||||
let is_interp = true
|
||||
}
|
||||
}
|
||||
if is_interp {
|
||||
// Flush the accumulated literal part (if non-empty)
|
||||
let part_len: Int = native_list_len(cur_part)
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let part_len: Int = native_list_len(cur_parts)
|
||||
if part_len > 0 {
|
||||
let part_text = str_join(cur_part, "")
|
||||
let part_text = str_join(cur_parts, "")
|
||||
if need_plus {
|
||||
let out_tokens = tok_append(out_tokens, "Plus", "+")
|
||||
}
|
||||
@@ -641,7 +673,7 @@ fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let out_tokens = tok_append(out_tokens, "Str", clean_part)
|
||||
let need_plus = true
|
||||
}
|
||||
let cur_part = native_list_empty()
|
||||
let cur_parts = native_list_empty()
|
||||
let has_interp = true
|
||||
|
||||
// Scan brace-balanced expression source
|
||||
@@ -649,6 +681,7 @@ fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
let expr_src: String = brace_result["text"]
|
||||
let new_i: Int = brace_result["pos"]
|
||||
let i = new_i
|
||||
let clean_start = new_i
|
||||
|
||||
// Re-lex the expression and inline the tokens.
|
||||
// Wrap in ( ) so that operators inside ${} (e.g.
|
||||
@@ -672,12 +705,11 @@ fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
}
|
||||
let need_plus = true
|
||||
} else {
|
||||
// Plain '$' not followed by '{' - treat as literal
|
||||
let cur_part = native_list_append(cur_part, "$")
|
||||
// Plain '$' not followed by '{' - treat as literal, continue clean run
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let cur_part = native_list_append(cur_part, ch)
|
||||
// Plain char — extends clean run, no append needed
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
@@ -686,8 +718,11 @@ fn scan_interp_string(src: String, start: Int, total: Int) -> Map<String, Any> {
|
||||
}
|
||||
|
||||
// Flush remaining literal segment and build final token list
|
||||
let part_text = str_join(cur_part, "")
|
||||
let part_len: Int = native_list_len(cur_part)
|
||||
if clean_start < i {
|
||||
let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i))
|
||||
}
|
||||
let part_len: Int = native_list_len(cur_parts)
|
||||
let part_text = str_join(cur_parts, "")
|
||||
if has_interp {
|
||||
// Interpolated string: only emit trailing segment if non-empty
|
||||
if part_len > 0 {
|
||||
|
||||
Reference in New Issue
Block a user