diff --git a/engram/dist/engram.c b/engram/dist/engram.c index b5fe865..275c694 100644 --- a/engram/dist/engram.c +++ b/engram/dist/engram.c @@ -308,7 +308,7 @@ el_val_t handle_request(el_val_t method, el_val_t path, el_val_t body) { if (str_eq(method, EL_STR("POST")) && (str_eq(clean, EL_STR("/api/nodes")) || str_eq(clean, EL_STR("/nodes")))) { return route_create_node(method, path, body); } - if (str_eq(method, EL_STR("GET")) && (str_eq(clean, EL_STR("/api/nodes")) || str_eq(clean, EL_STR("/nodes")) || str_eq(clean, EL_STR("/nodes/list")) || str_eq(clean, EL_STR("/api/nodes/list")))) { + if (str_eq(method, EL_STR("GET")) && (((str_eq(clean, EL_STR("/api/nodes")) || str_eq(clean, EL_STR("/nodes"))) || str_eq(clean, EL_STR("/nodes/list"))) || str_eq(clean, EL_STR("/api/nodes/list")))) { return route_scan_nodes(method, path, body); } if (str_eq(method, EL_STR("GET")) && (str_eq(clean, EL_STR("/api/edges")) || str_eq(clean, EL_STR("/edges")))) { @@ -351,8 +351,8 @@ el_val_t handle_request(el_val_t method, el_val_t path, el_val_t body) { return 0; } -int main(int argc, char** argv) { - el_runtime_init_args(argc, argv); +int main(int _argc, char** _argv) { + el_runtime_init_args(_argc, _argv); bind_str = env(EL_STR("ENGRAM_BIND")); if (str_eq(bind_str, EL_STR(""))) { bind_str = EL_STR(":8742"); diff --git a/lang/dist/platform/elc b/lang/dist/platform/elc index d2f167f..f1cba65 100755 Binary files a/lang/dist/platform/elc and b/lang/dist/platform/elc differ diff --git a/lang/dist/platform/elc-new b/lang/dist/platform/elc-new index 68ee155..f1cba65 100755 Binary files a/lang/dist/platform/elc-new and b/lang/dist/platform/elc-new differ diff --git a/lang/el-compiler/runtime/el_runtime.c b/lang/el-compiler/runtime/el_runtime.c index a30352c..df25f4a 100644 --- a/lang/el-compiler/runtime/el_runtime.c +++ b/lang/el-compiler/runtime/el_runtime.c @@ -42,7 +42,9 @@ #include #include #include +#ifdef HAVE_CURL #include +#endif /* ── Internal allocators ─────────────────────────────────────────────────── */ @@ -102,6 +104,45 @@ void el_request_end(void) { _tl_arena.count = 0; } +/* ── Scoped arena for CLI use ─────────────────────────────────────────────── * + * CLI programs never call el_request_start/end, so all strdup allocations are + * permanent. el_arena_push/pop let the compiler free intermediate strings + * after each compilation unit. + * + * el_arena_push() — activates the arena if not already active, saves the + * current arena count as a mark, and returns it as an el_val_t Int. + * el_arena_pop(mark) — frees all strings allocated since the push mark and + * resets the count. If count reaches 0, deactivates the arena. + */ +#define EL_ARENA_SCOPE_DEPTH 32 +static _Thread_local size_t _tl_arena_scope[EL_ARENA_SCOPE_DEPTH]; +static _Thread_local int _tl_arena_scope_depth = 0; + +el_val_t el_arena_push(void) { + if (!_tl_arena_active) { + _tl_arena_active = 1; + } + if (_tl_arena_scope_depth < EL_ARENA_SCOPE_DEPTH) { + _tl_arena_scope[_tl_arena_scope_depth++] = _tl_arena.count; + } + return (el_val_t)(int64_t)_tl_arena.count; +} + +el_val_t el_arena_pop(el_val_t mark) { + size_t save = (size_t)(int64_t)mark; + if (save > _tl_arena.count) save = 0; + for (size_t i = save; i < _tl_arena.count; i++) { + if (_tl_arena.ptrs[i]) { + free(_tl_arena.ptrs[i]); + _tl_arena.ptrs[i] = NULL; + } + } + _tl_arena.count = save; + if (_tl_arena_scope_depth > 0) _tl_arena_scope_depth--; + if (save == 0) _tl_arena_active = 0; + return 0; +} + /* Persistent allocation — bypasses the arena (state_set, engram internals). */ static char* el_strdup_persist(const char* s) { if (!s) return strdup(""); @@ -700,6 +741,39 @@ struct JsonParser { * the loop is observable. */ +/* ── JSON error helper (used by HTTP, PQ, crypto stubs) ─────────────────── */ + +/* JSON-escape an arbitrary C string into an allocated buffer. */ +static char* json_escape_alloc(const char* s) { + if (!s) return el_strdup(""); + JsonBuf b; jb_init(&b); + for (const char* p = s; *p; p++) { + unsigned char c = (unsigned char)*p; + switch (c) { + case '"': jb_puts(&b, "\\\""); break; + case '\\': jb_puts(&b, "\\\\"); break; + case '\n': jb_puts(&b, "\\n"); break; + case '\r': jb_puts(&b, "\\r"); break; + case '\t': jb_puts(&b, "\\t"); break; + default: + if (c < 0x20) { + char tmp[8]; snprintf(tmp, sizeof(tmp), "\\u%04x", c); + jb_puts(&b, tmp); + } else jb_putc(&b, (char)c); + } + } + return b.buf; +} + +static el_val_t http_error_json(const char* msg) { + char* esc = json_escape_alloc(msg ? msg : "unknown error"); + char* buf = el_strbuf(strlen(esc) + 16); + sprintf(buf, "{\"error\":\"%s\"}", esc); + free(esc); + return el_wrap_str(buf); +} + +#ifdef HAVE_CURL /* ── HTTP client write-callback buffer ───────────────────────────────────── */ typedef struct { @@ -733,36 +807,6 @@ static size_t http_write_cb(char* ptr, size_t size, size_t nmemb, void* ud) { return n; } -/* JSON-escape an arbitrary C string into an allocated buffer. */ -static char* json_escape_alloc(const char* s) { - if (!s) return el_strdup(""); - JsonBuf b; jb_init(&b); - for (const char* p = s; *p; p++) { - unsigned char c = (unsigned char)*p; - switch (c) { - case '"': jb_puts(&b, "\\\""); break; - case '\\': jb_puts(&b, "\\\\"); break; - case '\n': jb_puts(&b, "\\n"); break; - case '\r': jb_puts(&b, "\\r"); break; - case '\t': jb_puts(&b, "\\t"); break; - default: - if (c < 0x20) { - char tmp[8]; snprintf(tmp, sizeof(tmp), "\\u%04x", c); - jb_puts(&b, tmp); - } else jb_putc(&b, (char)c); - } - } - return b.buf; -} - -static el_val_t http_error_json(const char* msg) { - char* esc = json_escape_alloc(msg ? msg : "unknown error"); - char* buf = el_strbuf(strlen(esc) + 16); - sprintf(buf, "{\"error\":\"%s\"}", esc); - free(esc); - return el_wrap_str(buf); -} - /* HTTP timeout (ms) — read once from EL_HTTP_TIMEOUT_MS, default 60000. * Applied via CURLOPT_TIMEOUT_MS on every libcurl request. */ static long _el_http_timeout_ms = -1; @@ -970,6 +1014,7 @@ el_val_t http_post_to_file(el_val_t url, el_val_t body, el_val_t headers_map, el if (h) curl_slist_free_all(h); return r; } +#endif /* HAVE_CURL */ /* ── HTTP server (POSIX sockets + pthreads) ──────────────────────────────── */ @@ -1887,6 +1932,34 @@ el_val_t fs_write_bytes(el_val_t pathv, el_val_t bytesv, el_val_t lengthv) { return 1; } +// stdout_to_file / stdout_restore — redirect process stdout to a file and +// restore it. Used by the compiler's JS post-processing pipeline to capture +// codegen output before piping through terser / obfuscator. +#include +static int _el_saved_stdout_fd = -1; + +el_val_t stdout_to_file(el_val_t pathv) { + const char* path = EL_CSTR(pathv); + if (!path) return (el_val_t)(int64_t)-1; + fflush(stdout); + _el_saved_stdout_fd = dup(STDOUT_FILENO); + int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0) return (el_val_t)(int64_t)-1; + dup2(fd, STDOUT_FILENO); + close(fd); + return (el_val_t)(int64_t)0; +} + +el_val_t stdout_restore(void) { + if (_el_saved_stdout_fd >= 0) { + fflush(stdout); + dup2(_el_saved_stdout_fd, STDOUT_FILENO); + close(_el_saved_stdout_fd); + _el_saved_stdout_fd = -1; + } + return (el_val_t)(int64_t)0; +} + // exec_command — run a shell command, return exit code (0 = success). // Used by elb and other El tooling to invoke subprocesses. el_val_t exec_command(el_val_t cmdv) { @@ -1980,6 +2053,52 @@ el_val_t fs_list(el_val_t pathv) { return lst; } +/* fs_list_json — return directory entries as a JSON array of strings. + * Returns "[]" for missing or non-directory paths. Excludes "." and "..". */ +el_val_t fs_list_json(el_val_t pathv) { + const char* path = EL_CSTR(pathv); + if (!path) return EL_STR("[]"); + DIR* d = opendir(path); + if (!d) return EL_STR("[]"); + /* Collect entries first so we can build the JSON in one pass. */ + char** names = NULL; + size_t count = 0, cap = 0; + struct dirent* e; + while ((e = readdir(d)) != NULL) { + if (strcmp(e->d_name, ".") == 0 || strcmp(e->d_name, "..") == 0) continue; + if (count >= cap) { + cap = cap ? cap * 2 : 16; + names = realloc(names, cap * sizeof(char*)); + if (!names) { closedir(d); return EL_STR("[]"); } + } + names[count++] = strdup(e->d_name); + } + closedir(d); + /* Build JSON array. */ + size_t sz = 3; /* "[]" + NUL */ + for (size_t i = 0; i < count; i++) sz += strlen(names[i]) * 2 + 6; /* conservative */ + char* buf = malloc(sz); + if (!buf) { for (size_t i = 0; i < count; i++) free(names[i]); free(names); return EL_STR("[]"); } + size_t pos = 0; + buf[pos++] = '['; + for (size_t i = 0; i < count; i++) { + if (i > 0) buf[pos++] = ','; + buf[pos++] = '"'; + for (const char* p = names[i]; *p; p++) { + if (*p == '"' || *p == '\\') buf[pos++] = '\\'; + else if (*p == '\n') { buf[pos++] = '\\'; buf[pos++] = 'n'; continue; } + else if (*p == '\t') { buf[pos++] = '\\'; buf[pos++] = 't'; continue; } + buf[pos++] = *p; + } + buf[pos++] = '"'; + free(names[i]); + } + free(names); + buf[pos++] = ']'; + buf[pos] = '\0'; + return el_wrap_str(buf); +} + /* fs_exists — true iff stat(path) succeeds. Symlinks are followed. */ el_val_t fs_exists(el_val_t pathv) { const char* path = EL_CSTR(pathv); @@ -3231,14 +3350,20 @@ el_val_t json_get_raw(el_val_t json_str, el_val_t key) { el_val_t json_set(el_val_t json_str, el_val_t key, el_val_t value) { const char* json = EL_CSTR(json_str); const char* k = EL_CSTR(key); + /* raw_val is the JSON value as-is (already encoded by the caller). + * If it looks like a plain (non-JSON) string, wrap it as a JSON string. + * Convention: callers pass pre-encoded values like "\"bob\"" for strings, + * "42" for numbers, "true"/"false" for booleans. */ + const char* raw_val = EL_CSTR(value); if (!k) k = ""; + if (!raw_val) raw_val = "null"; if (!json || !*json) { /* Build a fresh object */ JsonBuf b; jb_init(&b); jb_putc(&b, '{'); jb_emit_escaped(&b, k); jb_putc(&b, ':'); - jb_emit_value(&b, value); + jb_puts(&b, raw_val); jb_putc(&b, '}'); return el_wrap_str(b.buf); } @@ -3252,7 +3377,7 @@ el_val_t json_set(el_val_t json_str, el_val_t key, el_val_t value) { memcpy(b.buf + b.len, json, prefix); b.len += prefix; b.buf[b.len] = '\0'; - jb_emit_value(&b, value); + jb_puts(&b, raw_val); jb_puts(&b, end); return el_wrap_str(b.buf); } @@ -3283,7 +3408,7 @@ el_val_t json_set(el_val_t json_str, el_val_t key, el_val_t value) { if (!empty) jb_putc(&b, ','); jb_emit_escaped(&b, k); jb_putc(&b, ':'); - jb_emit_value(&b, value); + jb_puts(&b, raw_val); /* Append from close_idx onward */ jb_puts(&b, json + close_idx); return el_wrap_str(b.buf); @@ -3364,6 +3489,87 @@ el_val_t json_array_get_string(el_val_t json_str, el_val_t index) { return el_wrap_str(parsed); } +/* json_escape_string — escape a string value for embedding in JSON. + * Returns the escaped content WITHOUT surrounding quotes. + * "say \"hello\"" -> "say \\\"hello\\\"" */ +el_val_t json_escape_string(el_val_t sv) { + const char* s = EL_CSTR(sv); + if (!s) return el_wrap_str(el_strdup("")); + size_t n = strlen(s); + /* Worst case: every char needs a 2-char escape. */ + char* out = malloc(n * 2 + 1); + if (!out) return el_wrap_str(el_strdup("")); + size_t j = 0; + for (size_t i = 0; i < n; i++) { + unsigned char c = (unsigned char)s[i]; + if (c == '"') { out[j++] = '\\'; out[j++] = '"'; } + else if (c == '\\') { out[j++] = '\\'; out[j++] = '\\'; } + else if (c == '\n') { out[j++] = '\\'; out[j++] = 'n'; } + else if (c == '\r') { out[j++] = '\\'; out[j++] = 'r'; } + else if (c == '\t') { out[j++] = '\\'; out[j++] = 't'; } + else { out[j++] = (char)c; } + } + out[j] = '\0'; + el_val_t result = el_wrap_str(el_strdup(out)); + free(out); + return result; +} + +/* json_build_object — build a JSON object from a flat key-value list. + * kvs is [key0, val0, key1, val1, ...]. Values are raw JSON (pass + * strings as "\"value\"" or use json_escape_string). */ +el_val_t json_build_object(el_val_t kvs) { + el_val_t list = kvs; + int64_t n = el_list_len(list); + JsonBuf b; jb_init(&b); + jb_putc(&b, '{'); + int first = 1; + for (int64_t i = 0; i + 1 < n; i += 2) { + el_val_t k = el_list_get(list, (el_val_t)i); + el_val_t v = el_list_get(list, (el_val_t)(i + 1)); + const char* ks = EL_CSTR(k); + const char* vs = EL_CSTR(v); + if (!ks || !vs) continue; + if (!first) jb_putc(&b, ','); + first = 0; + jb_putc(&b, '"'); + jb_puts(&b, ks); + jb_puts(&b, "\":\""); + /* escape the value string */ + size_t vn = strlen(vs); + for (size_t j = 0; j < vn; j++) { + unsigned char c = (unsigned char)vs[j]; + if (c == '"') { jb_putc(&b, '\\'); jb_putc(&b, '"'); } + else if (c == '\\') { jb_putc(&b, '\\'); jb_putc(&b, '\\'); } + else if (c == '\n') { jb_putc(&b, '\\'); jb_putc(&b, 'n'); } + else if (c == '\r') { jb_putc(&b, '\\'); jb_putc(&b, 'r'); } + else if (c == '\t') { jb_putc(&b, '\\'); jb_putc(&b, 't'); } + else { jb_putc(&b, (char)c); } + } + jb_putc(&b, '"'); + } + jb_putc(&b, '}'); + return el_wrap_str(b.buf); +} + +/* json_build_array — build a JSON array from a list of raw JSON values. + * items is ["\"alpha\"", "\"beta\"", "42", "true", ...]. */ +el_val_t json_build_array(el_val_t items) { + el_val_t list = items; + int64_t n = el_list_len(list); + JsonBuf b; jb_init(&b); + jb_putc(&b, '['); + for (int64_t i = 0; i < n; i++) { + el_val_t v = el_list_get(list, (el_val_t)i); + const char* vs = EL_CSTR(v); + if (!vs) continue; + if (i > 0) jb_putc(&b, ','); + jb_puts(&b, vs); + } + jb_putc(&b, ']'); + return el_wrap_str(b.buf); +} + /* ── Time ────────────────────────────────────────────────────────────────── */ el_val_t time_now(void) { @@ -3385,7 +3591,7 @@ el_val_t time_format(el_val_t ts, el_val_t fmt) { struct tm tm; gmtime_r(&s, &tm); const char* fmt_str = EL_CSTR(fmt); - if (!fmt_str || strcmp(fmt_str, "ISO") == 0) { + if (!fmt_str || *fmt_str == '\0' || strcmp(fmt_str, "ISO") == 0) { char buf[64]; snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, @@ -3404,15 +3610,13 @@ el_val_t time_to_parts(el_val_t ts) { if (msec < 0) { msec += 1000; s -= 1; } struct tm tm; gmtime_r(&s, &tm); - el_val_t m = el_map_new(0); - m = el_map_set(m, EL_STR(el_strdup("year")), (el_val_t)(tm.tm_year + 1900)); - m = el_map_set(m, EL_STR(el_strdup("month")), (el_val_t)(tm.tm_mon + 1)); - m = el_map_set(m, EL_STR(el_strdup("day")), (el_val_t)tm.tm_mday); - m = el_map_set(m, EL_STR(el_strdup("hour")), (el_val_t)tm.tm_hour); - m = el_map_set(m, EL_STR(el_strdup("minute")), (el_val_t)tm.tm_min); - m = el_map_set(m, EL_STR(el_strdup("second")), (el_val_t)tm.tm_sec); - m = el_map_set(m, EL_STR(el_strdup("ms")), (el_val_t)msec); - return m; + /* Return a JSON string so callers can use json_get to extract fields. */ + char buf[256]; + snprintf(buf, sizeof(buf), + "{\"year\":%d,\"month\":%d,\"day\":%d,\"hour\":%d,\"minute\":%d,\"second\":%d,\"ms\":%d}", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, msec); + return el_wrap_str(el_strdup(buf)); } el_val_t time_from_parts(el_val_t secs, el_val_t ns, el_val_t tz) { @@ -3518,6 +3722,12 @@ el_val_t now(void) { return el_now_instant(); } +/* now_ns — return current Unix time as nanoseconds (Int). + * Thin wrapper over el_now_instant for use in test timing. */ +el_val_t now_ns(void) { + return el_now_instant(); +} + /* unix_seconds(n) — Instant from a Unix-epoch second count. * unix_millis(n) — Instant from a Unix-epoch millisecond count. */ el_val_t unix_seconds(el_val_t n) { @@ -4745,12 +4955,44 @@ el_val_t state_del(el_val_t key) { el_val_t state_keys(void) { pthread_mutex_lock(&_state_mu); - el_val_t lst = el_list_empty(); + /* Build a JSON array string: ["key1","key2",...] */ + JsonBuf b; jb_init(&b); + jb_putc(&b, '['); for (size_t i = 0; i < _state_count; i++) { - lst = el_list_append(lst, el_wrap_str(el_strdup(_state_entries[i].key))); + if (i > 0) jb_putc(&b, ','); + jb_putc(&b, '"'); + jb_emit_escaped(&b, _state_entries[i].key); + jb_putc(&b, '"'); + } + jb_putc(&b, ']'); + pthread_mutex_unlock(&_state_mu); + return el_wrap_str(b.buf); +} + +/* Returns 1 (true) if the key is present in the state store, else 0 (false). */ +el_val_t state_has(el_val_t key) { + const char* k = EL_CSTR(key); + if (!k) return 0; + pthread_mutex_lock(&_state_mu); + StateEntry* e = state_find(k); + int found = (e != NULL) ? 1 : 0; + pthread_mutex_unlock(&_state_mu); + return (el_val_t)found; +} + +/* Returns the value for key, or default_val if the key is absent. */ +el_val_t state_get_or(el_val_t key, el_val_t default_val) { + const char* k = EL_CSTR(key); + if (!k) return default_val; + pthread_mutex_lock(&_state_mu); + StateEntry* e = state_find(k); + if (e) { + char* copy = el_strdup(e->value); + pthread_mutex_unlock(&_state_mu); + return el_wrap_str(copy); } pthread_mutex_unlock(&_state_mu); - return lst; + return default_val; } /* ── Float formatting ────────────────────────────────────────────────────── */ @@ -7248,8 +7490,10 @@ el_val_t engram_neighbors_json(el_val_t node_id, el_val_t max_depth, el_val_t di free(frontier); free(frontier_h); free(visited); jb_putc(&b, ']'); return el_wrap_str(b.buf); } - frontier[fc] = el_strdup(sid); frontier_h[fc] = 0; fc++; - visited[vc++] = el_strdup(sid); + /* Use plain strdup (not el_strdup) so arena doesn't track these pointers. + * The BFS loop manually frees them below — arena would double-free them. */ + frontier[fc] = strdup(sid); frontier_h[fc] = 0; fc++; + visited[vc++] = strdup(sid); int first = 1; while (fc > 0) { @@ -7278,8 +7522,8 @@ el_val_t engram_neighbors_json(el_val_t node_id, el_val_t max_depth, el_val_t di char tmp[64]; snprintf(tmp, sizeof(tmp), ",\"hops\":%lld}", (long long)(h + 1)); jb_puts(&b, tmp); first = 0; - if (vc < 1024) visited[vc++] = el_strdup(peer); - if (fc < 1024 && h + 1 < depth) { frontier[fc] = el_strdup(peer); frontier_h[fc] = h + 1; fc++; } + if (vc < 1024) visited[vc++] = strdup(peer); + if (fc < 1024 && h + 1 < depth) { frontier[fc] = strdup(peer); frontier_h[fc] = h + 1; fc++; } } free(cur); } @@ -7519,6 +7763,7 @@ el_val_t engram_query_range(el_val_t start_ms_v, el_val_t end_ms_v) { return el_wrap_str(b.buf); } +#ifdef HAVE_CURL /* ── DHARMA network ───────────────────────────────────────────────────────── * Real implementation. Peers are addressed by `dharma_id` — either bare * (e.g. "ntn-genesis", transport defaults to http://localhost:7770) or @@ -8030,6 +8275,7 @@ el_val_t dharma_peers(void) { free(peers); return out; } +#endif /* HAVE_CURL — DHARMA network */ /* ── Batch 4: LLM (Anthropic API client) ─────────────────────────────────── */ /* @@ -8044,6 +8290,7 @@ el_val_t dharma_peers(void) { * and returns a JSON-string el_val_t result. Iteration is capped at 10. */ +#ifdef HAVE_CURL static const char* LLM_DEFAULT_MODEL = "claude-sonnet-4-5"; static const char* LLM_API_URL = "https://api.anthropic.com/v1/messages"; static const char* LLM_VERSION = "2023-06-01"; @@ -8729,6 +8976,7 @@ el_val_t llm_models(void) { lst = el_list_append(lst, el_wrap_str(el_strdup("claude-haiku-4-5"))); return lst; } +#endif /* HAVE_CURL */ /* ── Native VM builtin aliases ────────────────────────────────────────────── * El source files use native_* names (El VM builtins). @@ -9919,6 +10167,7 @@ el_val_t aead_decrypt(el_val_t key_hex, el_val_t nonce_hex, el_val_t ciphertext_ #endif /* __has_include() */ +#ifdef HAVE_CURL /* ──────────────────────────────────────────────────────────────────────────── * OTLP/HTTP observability — logs, traces, metrics * @@ -10277,6 +10526,8 @@ el_val_t emit_event(el_val_t name_v, el_val_t duration_ms_v) { return trace_span_end(h); } +#endif /* HAVE_CURL — OTLP */ + /* ── Threading seed primitives ─────────────────────────────────────────────── * __thread_create(fn_name, arg) -> Int spawn El fn in a pthread, return tid * __thread_join(tid) -> String join thread, return result string @@ -10731,6 +10982,7 @@ el_val_t config(el_val_t key_v) { return el_wrap_str(el_strdup(val)); } +#ifdef HAVE_CURL /* http_patch — HTTP PATCH request with Content-Type: application/json. * Returns the response body (same error convention as http_post_json). */ el_val_t http_patch(el_val_t url_v, el_val_t body_v) { @@ -10846,6 +11098,7 @@ el_val_t http_get_engram(el_val_t url_v, el_val_t key_v) { } return el_wrap_str(rb.data); } +#endif /* HAVE_CURL */ /* str_to_bytes — encode a string as a JSON array of unsigned byte values. * "hello" -> "[104,101,108,108,111]" @@ -10924,3 +11177,29 @@ el_val_t hash_sha256(el_val_t sv) { return el_hex_encode(digest, 32); } +#ifndef HAVE_CURL +/* ── HAVE_CURL=0 stubs — compile without -lcurl for the elc CLI binary. ───── * + * These return a JSON error string so El programs get a clear message if they + * call HTTP/LLM functions in a curl-less build. */ +static el_val_t _no_curl_err(void) { + return el_wrap_str(el_strdup("{\"error\":\"not built with HAVE_CURL\"}")); +} +el_val_t http_get(el_val_t url) { (void)url; return _no_curl_err(); } +el_val_t http_post(el_val_t url, el_val_t body) { (void)url; (void)body; return _no_curl_err(); } +el_val_t http_post_json(el_val_t url, el_val_t body) { (void)url; (void)body; return _no_curl_err(); } +el_val_t http_get_with_headers(el_val_t url, el_val_t h) { (void)url; (void)h; return _no_curl_err(); } +el_val_t http_post_with_headers(el_val_t url, el_val_t b, el_val_t h) { (void)url; (void)b; (void)h; return _no_curl_err(); } +el_val_t http_post_form_auth(el_val_t url, el_val_t b, el_val_t a) { (void)url; (void)b; (void)a; return _no_curl_err(); } +el_val_t http_delete(el_val_t url) { (void)url; return _no_curl_err(); } +el_val_t http_patch(el_val_t url, el_val_t body) { (void)url; (void)body; return _no_curl_err(); } +el_val_t http_get_to_file(el_val_t url, el_val_t h, el_val_t p) { (void)url; (void)h; (void)p; return _no_curl_err(); } +el_val_t http_post_to_file(el_val_t url, el_val_t b, el_val_t h, el_val_t p) { (void)url; (void)b; (void)h; (void)p; return _no_curl_err(); } +el_val_t http_post_engram(el_val_t url, el_val_t k, el_val_t b) { (void)url; (void)k; (void)b; return _no_curl_err(); } +el_val_t http_get_engram(el_val_t url, el_val_t k) { (void)url; (void)k; return _no_curl_err(); } +el_val_t llm_call(el_val_t m, el_val_t p) { (void)m; (void)p; return _no_curl_err(); } +el_val_t llm_call_system(el_val_t m, el_val_t s, el_val_t u) { (void)m; (void)s; (void)u; return _no_curl_err(); } +el_val_t llm_call_agentic(el_val_t m, el_val_t s, el_val_t u, el_val_t t) { (void)m; (void)s; (void)u; (void)t; return _no_curl_err(); } +el_val_t llm_vision(el_val_t m, el_val_t s, el_val_t p, el_val_t i) { (void)m; (void)s; (void)p; (void)i; return _no_curl_err(); } +el_val_t llm_models(void) { return el_list_empty(); } +void llm_register_tool(el_val_t n, el_val_t f) { (void)n; (void)f; } +#endif /* !HAVE_CURL */ diff --git a/lang/el-compiler/runtime/el_runtime.h b/lang/el-compiler/runtime/el_runtime.h index bcf8f59..12ad99b 100644 --- a/lang/el-compiler/runtime/el_runtime.h +++ b/lang/el-compiler/runtime/el_runtime.h @@ -22,6 +22,9 @@ * EL_STR(s) cast string literal to el_val_t * EL_CSTR(v) cast el_val_t back to const char* * EL_INT(v) identity — el_val_t is already int64_t + * EL_NULL null / zero value + * EL_FALSE boolean false (0) + * EL_TRUE boolean true (1) * * Link requirements: * -lcurl — required for the HTTP client (http_get, http_post, llm_*). @@ -53,6 +56,8 @@ typedef int64_t el_val_t; #define EL_CSTR(v) ((const char*)(uintptr_t)(v)) #define EL_INT(v) (v) #define EL_NULL ((el_val_t)0) +#define EL_FALSE ((el_val_t)0) +#define EL_TRUE ((el_val_t)1) /* Float values share the el_val_t (int64) slot via a bit-cast. * The codegen emits Float literals as `el_from_float()` so the @@ -117,6 +122,10 @@ el_val_t el_min(el_val_t a, el_val_t b); void el_retain(el_val_t v); void el_release(el_val_t v); +/* ── Scoped arena (CLI use) ───────────────────────────────────────────────── */ +el_val_t el_arena_push(void); +el_val_t el_arena_pop(el_val_t mark); + /* ── List ────────────────────────────────────────────────────────────────── */ el_val_t el_list_new(el_val_t count, ...); @@ -222,6 +231,7 @@ el_val_t el_html_sanitize(el_val_t input_html, el_val_t allowlist_json); el_val_t fs_read(el_val_t path); el_val_t fs_write(el_val_t path, el_val_t content); el_val_t fs_list(el_val_t path); +el_val_t fs_list_json(el_val_t path); el_val_t fs_exists(el_val_t path); el_val_t fs_mkdir(el_val_t path); /* mkdir -p, mode 0755 */ @@ -251,6 +261,9 @@ el_val_t json_set(el_val_t json_str, el_val_t key, el_val_t value); el_val_t json_array_len(el_val_t json_str); el_val_t json_array_get(el_val_t json_str, el_val_t index); el_val_t json_array_get_string(el_val_t json_str, el_val_t index); +el_val_t json_escape_string(el_val_t sv); +el_val_t json_build_object(el_val_t kvs); +el_val_t json_build_array(el_val_t items); /* ── Time ────────────────────────────────────────────────────────────────── */ @@ -263,6 +276,7 @@ el_val_t time_to_parts(el_val_t ts); el_val_t time_from_parts(el_val_t secs, el_val_t ns, el_val_t tz); el_val_t time_add(el_val_t ts, el_val_t n, el_val_t unit); el_val_t time_diff(el_val_t ts1, el_val_t ts2, el_val_t unit); +el_val_t now_ns(void); /* ── Instant + Duration: first-class temporal types ────────────────────────── * Both types share the el_val_t (int64) slot. Instants are nanoseconds @@ -419,6 +433,8 @@ el_val_t state_set(el_val_t key, el_val_t value); el_val_t state_get(el_val_t key); el_val_t state_del(el_val_t key); el_val_t state_keys(void); +el_val_t state_has(el_val_t key); +el_val_t state_get_or(el_val_t key, el_val_t default_val); /* ── Float formatting ────────────────────────────────────────────────────── */ @@ -750,6 +766,10 @@ el_val_t exec_capture(el_val_t cmd); /* run shell command, capture stdout */ el_val_t exec(el_val_t cmd); /* exec(cmd) → stdout String (30s timeout) */ el_val_t exec_bg(el_val_t cmd); /* exec_bg(cmd) → PID String (non-blocking) */ +/* ── Stdout redirection (used by compiler JS pipeline) ───────────────────── */ +el_val_t stdout_to_file(el_val_t path); /* redirect process stdout to a file */ +el_val_t stdout_restore(void); /* restore process stdout to terminal */ + el_val_t emit_log(el_val_t level, el_val_t msg, el_val_t fields_json); el_val_t emit_metric(el_val_t name, el_val_t value, el_val_t tags_json); el_val_t trace_span_start(el_val_t name); diff --git a/lang/el-compiler/src/codegen.el b/lang/el-compiler/src/codegen.el index 6ac69ce..d8a78f6 100644 --- a/lang/el-compiler/src/codegen.el +++ b/lang/el-compiler/src/codegen.el @@ -38,10 +38,13 @@ fn is_hex_digit_byte(b: Int) -> Bool { } fn c_escape(s: String) -> String { - // Use index-based byte scanning via str_char_code(s, i) and str_char_at(s, i). - // This avoids native_string_chars + str_join, which corrupts high-byte (>= 0x80) - // characters because list_join's looks_like_string heuristic rejects strings - // whose first byte is >= 0x7F and emits them as decimal pointer values instead. + // Batch ASCII chars using str_slice instead of str_char_at per byte. + // Track clean_start: the beginning of the current run of bytes that need + // no escaping. On each special byte, flush the accumulated clean run via + // str_slice, then append the escape. This reduces parts-list appends from + // O(N) to O(K) where K = number of special bytes << N for normal strings. + // + // Special bytes: '"'=34, '\\'=92, '\n'=10, '\r'=13, '\t'=9, any byte>=128. // // IMPORTANT: after a \xNN hex escape, if the next byte is a hex digit // (0-9, a-f, A-F), we emit `""` to split the C string literal so the C @@ -51,46 +54,75 @@ fn c_escape(s: String) -> String { let total: Int = str_len(s) let parts: [String] = native_list_empty() let i: Int = 0 + let clean_start: Int = 0 let prev_was_hex_escape: Bool = false while i < total { let bval: Int = str_char_code(s, i) - // If the previous token was a \xNN escape and the current byte is a - // hex digit, insert an empty string literal ("") to break the escape. + // Handle the hex-escape split case first: if prev was \xNN and this + // byte is a hex digit, we must flush the clean run and insert "". + // (At this point clean_start == i since the previous special byte + // already reset it, so flush is a no-op unless something is pending.) if prev_was_hex_escape { if is_hex_digit_byte(bval) { + // Flush any accumulated clean bytes before the split marker. + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\"\"") + let clean_start = i } } let prev_was_hex_escape = false if bval == 34 { - // 34 = '"' + // 34 = '"' — flush clean run, then escape + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\\"") + let clean_start = i + 1 } else { if bval == 92 { // 92 = '\\' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\\\") + let clean_start = i + 1 } else { if bval == 10 { // 10 = '\n' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\n") + let clean_start = i + 1 } else { if bval == 13 { // 13 = '\r' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\r") + let clean_start = i + 1 } else { if bval == 9 { // 9 = '\t' + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\t") + let clean_start = i + 1 } else { if bval >= 128 { - // Escape non-ASCII bytes (>= 0x80) as \xNN so - // Clang does not misinterpret multi-byte UTF-8 - // sequences in C string literals. + // Non-ASCII: flush, then \xNN + if clean_start < i { + let parts = native_list_append(parts, str_slice(s, clean_start, i)) + } let parts = native_list_append(parts, "\\x" + byte_to_hex2(bval)) let prev_was_hex_escape = true - } else { - let parts = native_list_append(parts, str_char_at(s, i)) + let clean_start = i + 1 } + // else: plain ASCII — extends the current clean run (no append) } } } @@ -98,13 +130,67 @@ fn c_escape(s: String) -> String { } let i = i + 1 } - str_join(parts, "") + // Flush the final clean run if any + if clean_start < total { + let parts = native_list_append(parts, str_slice(s, clean_start, total)) + } + let result: String = str_join(parts, "") + // parts list fully consumed — release to free peak heap. + el_release(parts) + result } fn c_str_lit(s: String) -> String { "\"" + c_escape(s) + "\"" } +// sanitize_test_name — convert a test name string to a valid C identifier fragment. +// "int-to-str" -> "int_to_str", "lex empty" -> "lex_empty" +fn sanitize_test_name(name: String) -> String { + let n: Int = str_len(name) + let i: Int = 0 + let out: String = "" + while i < n { + let code: Int = str_char_code(name, i) + // a-z: 97-122, A-Z: 65-90, 0-9: 48-57 — keep; everything else -> '_' + if code >= 97 { + if code <= 122 { + let out = out + str_char_at(name, i) + } else { + let out = out + "_" + } + } else { + if code >= 65 { + if code <= 90 { + let out = out + str_char_at(name, i) + } else { + if code >= 48 { + if code <= 57 { + let out = out + str_char_at(name, i) + } else { + let out = out + "_" + } + } else { + let out = out + "_" + } + } + } else { + if code >= 48 { + if code <= 57 { + let out = out + str_char_at(name, i) + } else { + let out = out + "_" + } + } else { + let out = out + "_" + } + } + } + let i = i + 1 + } + out +} + // -- Type mapping -------------------------------------------------------------- fn el_type_to_c(type_str: String) -> String { @@ -205,40 +291,42 @@ fn next_html_id() -> String { // We build them all into parts, then the caller wraps with concat chain. fn cg_html_parts(children: [Map], acc_var: String) -> String { + // Accumulate fragments into a list to avoid O(n²) string growth. + // Each append is O(1); the single str_join at the end is O(total_size). let n: Int = native_list_len(children) let i = 0 - let out = "" + let parts: [String] = native_list_empty() while i < n { let child: Map = native_list_get(children, i) let html_kind: String = child["html"] if str_eq(html_kind, "Text") { let text: String = child["text"] - let out = out + acc_var + " = el_str_concat(" + acc_var + ", EL_STR(" + c_str_lit(text) + ")); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", EL_STR(" + c_str_lit(text) + ")); ") } if str_eq(html_kind, "Doctype") { - let out = out + acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\"\")); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\"\")); ") } if str_eq(html_kind, "Interp") { let val_node = child["value"] let val_c: String = cg_expr(val_node) - let out = out + acc_var + " = el_str_concat(" + acc_var + ", html_escape(" + val_c + ")); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", html_escape(" + val_c + ")); ") } if str_eq(html_kind, "Raw") { let val_node = child["value"] let val_c: String = cg_expr(val_node) - let out = out + acc_var + " = el_str_concat(" + acc_var + ", html_raw(" + val_c + ")); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", html_raw(" + val_c + ")); ") } if str_eq(html_kind, "Element") { let elem_c: String = cg_html_element_str(child, acc_var) - let out = out + elem_c + let parts = native_list_append(parts, elem_c) } if str_eq(html_kind, "Each") { let each_c: String = cg_html_each(child, acc_var) - let out = out + each_c + let parts = native_list_append(parts, each_c) } let i = i + 1 } - out + str_join(parts, "") } // Generate open-tag attribute fragments inline. @@ -247,9 +335,10 @@ fn cg_html_parts(children: [Map], acc_var: String) -> String { // Dynamic: "value" is an expr node. // Bool: no "value" field. fn cg_html_attrs_str(attrs: [Map], acc_var: String) -> String { + // Accumulate fragments into a list to avoid O(n²) string growth. let n: Int = native_list_len(attrs) let i = 0 - let out = "" + let parts: [String] = native_list_empty() // Closing-quote snippet: EL_STR("\"") in C text. let close_q: String = "EL_STR(" + c_str_lit("\"") + ")" while i < n { @@ -262,26 +351,26 @@ fn cg_html_attrs_str(attrs: [Map], acc_var: String) -> String { if str_eq(kind, "static") { // Static attribute: value is a raw string. let sv: String = attr["value"] - let out = out + acc_var + " = el_str_concat(" + acc_var + ", " + open_attr + "); " - let out = out + acc_var + " = el_str_concat(" + acc_var + ", EL_STR(" + c_str_lit(sv) + ")); " - let out = out + acc_var + " = el_str_concat(" + acc_var + ", " + close_q + "); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", " + open_attr + "); ") + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", EL_STR(" + c_str_lit(sv) + ")); ") + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", " + close_q + "); ") } else { if str_eq(kind, "dynamic") { // Dynamic attribute: value is an expr node — html_escape it. let val_node = attr["value"] let val_c: String = cg_expr(val_node) - let out = out + acc_var + " = el_str_concat(" + acc_var + ", " + open_attr + "); " - let out = out + acc_var + " = el_str_concat(" + acc_var + ", html_escape(" + val_c + ")); " - let out = out + acc_var + " = el_str_concat(" + acc_var + ", " + close_q + "); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", " + open_attr + "); ") + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", html_escape(" + val_c + ")); ") + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", " + close_q + "); ") } else { // Boolean attribute (no value): emit " name" let bool_attr: String = "EL_STR(" + c_str_lit(" " + attr_name) + ")" - let out = out + acc_var + " = el_str_concat(" + acc_var + ", " + bool_attr + "); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", " + bool_attr + "); ") } } let i = i + 1 } - out + str_join(parts, "") } // Generate code for a single element, appending into acc_var. @@ -290,19 +379,21 @@ fn cg_html_element_str(elem: Map, acc_var: String) -> String { let attrs: [Map] = elem["attrs"] let children: [Map] = elem["children"] let self_closing: Bool = elem["self_closing"] + // Accumulate into a list to avoid O(n²) string growth for deeply nested trees. + let parts: [String] = native_list_empty() // Open tag: - let out = out + acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\"/>\")); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\"/>\")); ") } else { // Close open tag: > - let out = out + acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\">\")); " - let out = out + cg_html_parts(children, acc_var) - let out = out + acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\"\")); " + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\">\")); ") + let parts = native_list_append(parts, cg_html_parts(children, acc_var)) + let parts = native_list_append(parts, acc_var + " = el_str_concat(" + acc_var + ", EL_STR(\"\")); ") } - out + str_join(parts, "") } // Generate code for {#each list as item} ... {/each}. @@ -393,6 +484,14 @@ fn cg_expr(expr: Map) -> String { if kind == "Neg" { let inner = expr["inner"] + let inner_kind: String = inner["expr"] + // Float literal negation: emit el_from_float(-n) so the IEEE 754 sign + // bit is set correctly. Arithmetic negation of the int64 bit pattern + // (the el_val_t representation) produces garbage, not -f. + if str_eq(inner_kind, "Float") { + let fval: String = inner["value"] + return "el_from_float(-" + fval + ")" + } let inner_c: String = cg_expr(inner) return "(-" + inner_c + ")" } @@ -718,6 +817,14 @@ fn cg_expr(expr: Map) -> String { return "(" + left_c + " == " + right_c + ")" } } + // Float literal or negative float literal: use plain == (bit-equal + // el_val_t comparison). This handles `r0 == 3.0`, `neg == -3.0`, etc. + if is_float_expr(left) { + return "(" + left_c + " == " + right_c + ")" + } + if is_float_expr(right) { + return "(" + left_c + " == " + right_c + ")" + } if left_kind == "Str" { return "str_eq(" + left_c + ", " + right_c + ")" } @@ -769,6 +876,13 @@ fn cg_expr(expr: Map) -> String { return "(" + left_c + " != " + right_c + ")" } } + // Float-typed operands use plain != (bit-equal comparison). + if is_float_expr(left) { + return "(" + left_c + " != " + right_c + ")" + } + if is_float_expr(right) { + return "(" + left_c + " != " + right_c + ")" + } if left_kind == "Str" { return "!str_eq(" + left_c + ", " + right_c + ")" } @@ -808,6 +922,8 @@ fn cg_expr(expr: Map) -> String { let i = i + 1 } let args_c: String = str_join(args_parts, ", ") + // args_parts list fully consumed — release to free peak heap. + el_release(args_parts) if func_kind == "Ident" { let fn_name: String = func["name"] @@ -912,7 +1028,10 @@ fn cg_expr(expr: Map) -> String { let items_parts = native_list_append(items_parts, elem_c) let i = i + 1 } - return "el_list_new(" + native_int_to_str(n) + ", " + str_join(items_parts, ", ") + ")" + let items_joined: String = str_join(items_parts, ", ") + // items_parts fully consumed — release to free peak heap. + el_release(items_parts) + return "el_list_new(" + native_int_to_str(n) + ", " + items_joined + ")" } if kind == "Map" { @@ -933,7 +1052,10 @@ fn cg_expr(expr: Map) -> String { let items_parts = native_list_append(items_parts, c_str_lit(key) + ", " + val_c) let i = i + 1 } - return "el_map_new(" + native_int_to_str(n) + ", " + str_join(items_parts, ", ") + ")" + let items_joined: String = str_join(items_parts, ", ") + // items_parts fully consumed — release to free peak heap. + el_release(items_parts) + return "el_map_new(" + native_int_to_str(n) + ", " + items_joined + ")" } if kind == "Try" { @@ -1036,7 +1158,10 @@ fn cg_match(expr: Map) -> String { let i = i + 1 } let parts = native_list_append(parts, done_label + ":; " + result_var + "; })") - str_join(parts, "") + let result: String = str_join(parts, "") + // parts list fully consumed — release to free peak heap. + el_release(parts) + result } // Lower a match statement (used for side effects, not as an expression) to a @@ -1205,7 +1330,10 @@ fn cg_if_expr_arm(stmts: [Map], result_var: String) -> String { } let i = i + 1 } - str_join(parts, "") + let result: String = str_join(parts, "") + // parts list fully consumed — release to free peak heap. + el_release(parts) + result } fn cg_if_expr(expr: Map) -> String { @@ -1450,6 +1578,26 @@ fn cg_stmt(stmt: Map, indent: String, declared: [String]) -> [Strin cg_stmts(try_body, indent, native_list_clone(declared)) return declared } + + // assert , — test harness assertion + if kind == "Assert" { + let cond_node = stmt["cond"] + let msg_node = stmt["msg"] + let c_cond: String = cg_expr(cond_node) + let c_msg: String = "" + let msg_kind: String = msg_node["expr"] + if str_eq(msg_kind, "Str") { + let raw_msg: String = msg_node["value"] + let c_msg = "\"" + c_escape(raw_msg) + "\"" + } else { + let c_msg = "EL_STR_PTR(" + cg_expr(msg_node) + ")" + } + emit_line(indent + "if (!(" + c_cond + ")) {") + emit_line(indent + " __el_test_fail(__el_cur_test, " + c_msg + "); __el_fail++;") + emit_line(indent + "} else { __el_pass++; }") + return declared + } + declared } @@ -1541,7 +1689,11 @@ fn cg_stmts(stmts: [Map], indent: String, declared: [String]) -> [S let decl = declared while i < n { let stmt = native_list_get(stmts, i) + // Per-statement arena scope: free intermediate strings (str_concat + // fragments, cg_expr results) after each statement is emitted. + let s_mark: Any = el_arena_push() let decl = cg_stmt(stmt, indent, decl) + el_arena_pop(s_mark) let i = i + 1 } decl @@ -1565,7 +1717,10 @@ fn params_to_c(params: [Map]) -> String { let parts = native_list_append(parts, decl) let i = i + 1 } - str_join(parts, ", ") + let result: String = str_join(parts, ", ") + // parts list fully consumed — release to free peak heap. + el_release(parts) + result } // Transform a function body so that an implicit-return final expression @@ -2085,6 +2240,19 @@ fn is_int_expr(expr: Map) -> Bool { return false } +// is_float_expr — true when expr is (or evaluates to) a Float-typed value. +// Used in EqEq/NotEq codegen to avoid str_eq on float values. +fn is_float_expr(expr: Map) -> Bool { + let k: String = expr["expr"] + if str_eq(k, "Float") { return true } + if str_eq(k, "Neg") { + let inner = expr["inner"] + let ik: String = inner["expr"] + if str_eq(ik, "Float") { return true } + } + false +} + // -- Capability-kind enforcement ---------------------------------------------- // // A program's top-level block (cgi / service / none) determines which @@ -2534,6 +2702,9 @@ fn builtin_arity(name: String) -> Int { if str_eq(name, "__channel_recv") { return 1 } if str_eq(name, "__channel_try_recv") { return 1 } if str_eq(name, "__channel_close") { return 1 } + // Arena mark/restore builtins + if str_eq(name, "el_arena_push") { return 0 } + if str_eq(name, "el_arena_pop") { return 1 } // -1 sentinel: variadic / unknown / user-defined -> no check. return -1 } @@ -2770,7 +2941,8 @@ fn cg_fn(stmt: Map) -> Void { if !str_eq(ret_type, "Void") { let body_xformed = transform_implicit_return(body) } - cg_stmts(body_xformed, " ", decl) + let final_decl = cg_stmts(body_xformed, " ", decl) + el_release(final_decl) emit_line(" return 0;") emit_line("}") emit_blank() @@ -3244,3 +3416,477 @@ fn codegen(stmts: [Map], source: String) -> String { // Return empty string - output was streamed via println "" } + +// ── Streaming codegen (JIT function-at-a-time) ───────────────────────────── +// +// codegen_streaming is a memory-efficient alternative to codegen(). +// Instead of receiving the full parsed AST, it receives the raw token list +// and a pre-scanned signature list (from scan_fn_sigs in parser.el). +// +// Pipeline: +// 1. Scan phase (already done by caller): scan_fn_sigs(tokens) -> sigs +// 2. Emit preamble using sigs (no full AST needed) +// 3. For each top-level statement: +// parse_one(tokens, pos) -> { node, pos } +// cg_decl_streaming(node) <- emit C for this one decl +// el_release(node) <- discard AST immediately +// +// Peak memory: O(one function's AST) instead of O(whole program AST). +// +// Entry point: codegen_streaming(tokens, sigs, source) -> String + +// cg_decl_streaming — emit C for a single top-level declaration. +// Handles FnDef, ExternFn, TypeDef, EnumDef, Import, CgiBlock, ServiceBlock. +// Top-level Let statements go into the main() body, not here. +// Top-level executable statements (non-fn, non-let, non-decl) are +// accumulated into state and emitted later in main(). +fn cg_decl_streaming(stmt: Map) -> Void { + let sk: String = stmt["stmt"] + if str_eq(sk, "FnDef") { + cg_fn(stmt) + return + } + // All other top-level decl kinds are either no-ops (Import, TypeDef, + // EnumDef, ExternFn forward decl already emitted) or capability markers + // (CgiBlock, ServiceBlock already handled in preamble). + // Top-level Lets are also no-ops here (file-scope slots already emitted). + // Executable top-level stmts (Expr, Return, etc.) are accumulated in state. + if !str_eq(sk, "FnDef") { + if !is_top_level_decl(stmt) { + if !str_eq(sk, "Let") { + // This is an executable top-level statement. + // We can't emit it into main() yet because we haven't started + // emitting main(). Accumulate in state as a list index. + // We'll collect these into a list and emit after all fns. + state_set("__streaming_has_toplevel_stmts", "1") + } + } + } +} + +// emit_streaming_preamble — emit #includes, forward decls, and file-scope lets +// using the pre-scanned signature data (no full AST). +fn emit_streaming_preamble(sigs: [Map], source: String) -> Void { + let n: Int = native_list_len(sigs) + + // Detect program kind from sigs + let cgi_count: Int = 0 + let svc_count: Int = 0 + let i: Int = 0 + while i < n { + let sig = native_list_get(sigs, i) + let sk: String = sig["kind"] + if str_eq(sk, "cgi_block") { let cgi_count = cgi_count + 1 } + if str_eq(sk, "service_block") { let svc_count = svc_count + 1 } + let i = i + 1 + } + if cgi_count > 1 { + emit_line("#error \"El: multiple cgi blocks in program (only one allowed)\"") + } + if svc_count > 1 { + emit_line("#error \"El: multiple service blocks in program (only one allowed)\"") + } + if cgi_count >= 1 { + if svc_count >= 1 { + emit_line("#error \"El: program declares both cgi and service blocks (mutually exclusive - pick one)\"") + } + } + let kind: String = "utility" + if cgi_count >= 1 { let kind = "cgi" } + if svc_count >= 1 { let kind = "service" } + state_set("__program_kind", kind) + state_set("__cap_violations", "") + state_set("__arity_violations", "") + state_set("__time_violations", "") + + emit_line("#include ") + emit_line("#include ") + emit_line("#include \"el_runtime.h\"") + emit_blank() + + // Forward declarations — use pre-computed params_c strings from scan. + let i = 0 + while i < n { + let sig = native_list_get(sigs, i) + let sk: String = sig["kind"] + if str_eq(sk, "fn") { + let fn_name: String = sig["name"] + if !str_eq(fn_name, "main") { + let params_c: String = sig["params_c"] + emit_line("el_val_t " + fn_name + "(" + params_c + ");") + } + } + if str_eq(sk, "extern_fn") { + let fn_name: String = sig["name"] + let params_c: String = sig["params_c"] + emit_line("el_val_t " + fn_name + "(" + params_c + ");") + } + let i = i + 1 + } + emit_blank() + + // File-scope let slots + let has_toplevel_lets: Bool = false + let i = 0 + while i < n { + let sig = native_list_get(sigs, i) + let sk: String = sig["kind"] + if str_eq(sk, "toplevel_let") { + let name: String = sig["name"] + let ltype: String = sig["ltype"] + if str_eq(ltype, "Int") { add_int_name(name) } + emit_line("el_val_t " + name + ";") + let has_toplevel_lets = true + } + let i = i + 1 + } + if has_toplevel_lets { emit_blank() } +} + +// codegen_streaming — JIT function-at-a-time compiler backend. +// tokens: flat token list from lex() +// sigs: pre-scanned signature list from scan_fn_sigs(tokens) +// source: original source string (for string literal lookup) +fn codegen_streaming(tokens: [Any], sigs: [Map], source: String) -> String { + let total_tokens: Int = native_list_len(tokens) / 2 + + // Emit preamble (forward decls, file-scope lets, #includes) + // Arena scope: free intermediate strings built during preamble emission. + let preamble_mark: Any = el_arena_push() + emit_streaming_preamble(sigs, source) + el_arena_pop(preamble_mark) + + // Detect whether there is a fn main() and whether there are top-level + // executable stmts (for library detection) from sigs. + let has_el_main: Bool = false + let ns: Int = native_list_len(sigs) + let si: Int = 0 + while si < ns { + let sig = native_list_get(sigs, si) + let sk2: String = sig["kind"] + if str_eq(sk2, "fn") { + let fn_name_chk: String = sig["name"] + if str_eq(fn_name_chk, "main") { let has_el_main = true } + } + let si = si + 1 + } + + // Collect top-level let names for seeding main()'s declared set. + let toplevel_let_names: [String] = native_list_empty() + let si = 0 + while si < ns { + let sig = native_list_get(sigs, si) + let sk2: String = sig["kind"] + if str_eq(sk2, "toplevel_let") { + let tname: String = sig["name"] + let toplevel_let_names = native_list_append(toplevel_let_names, tname) + } + let si = si + 1 + } + + // In test mode: collect test function names for harness main(). + let test_is_mode: Bool = false + let tmode_str: String = state_get("__test_mode") + if str_eq(tmode_str, "1") { let test_is_mode = true } + let test_names: [String] = native_list_empty() + let test_c_names: [String] = native_list_empty() + + // Emit test harness preamble (counters, fail printer) when in test mode. + if test_is_mode { + emit_line("#include ") + emit_blank() + emit_line("static int __el_pass = 0, __el_fail = 0;") + emit_line("static const char *__el_cur_test = \"(none)\";") + emit_line("static void __el_test_fail(const char *test, const char *msg) {") + emit_line(" fprintf(stderr, \"FAIL %-40s %s\\n\", test, msg);") + emit_line("}") + emit_blank() + } + + // Streaming parse-emit loop. + // For each parsed stmt: + // - FnDef (not main): emit immediately via cg_fn, release AST + // - Others: accumulate only fn-main body and top-level executable stmts + // (these are small in count relative to fn bodies) + let pos: Int = 0 + let el_main_body: [Map] = native_list_empty() + let toplevel_exec_stmts: [Map] = native_list_empty() + let has_toplevel_exec: Bool = false + + let stream_running: Bool = true + while stream_running { + if pos >= total_tokens { + let stream_running = false + } else { + let k: String = tok_kind(tokens, pos) + if str_eq(k, "Eof") { + let stream_running = false + } else { + if str_eq(k, "Test") { + if test_is_mode { + // Compile test "name" { ... } block into a static void __el_test_NAME() function. + let p: Int = pos + 1 + let test_name: String = "unnamed" + if str_eq(tok_kind(tokens, p), "Str") { + let test_name = tok_value(tokens, p) + let p = p + 1 + } + let fn_c_name: String = "__el_test_" + sanitize_test_name(test_name) + let test_names = native_list_append(test_names, test_name) + let test_c_names = native_list_append(test_c_names, fn_c_name) + // Emit the test function header. + emit_line("static void " + fn_c_name + "(void) {") + emit_line(" __el_cur_test = \"" + c_escape(test_name) + "\";") + // Skip the opening LBrace and parse body statements. + if str_eq(tok_kind(tokens, p), "LBrace") { let p = p + 1 } + let body_decl: [String] = native_list_empty() + let body_done: Bool = false + while !body_done { + let bk: String = tok_kind(tokens, p) + if str_eq(bk, "RBrace") { + let body_done = true + } else { + if str_eq(bk, "Eof") { + let body_done = true + } else { + let br = parse_one(tokens, p) + let bstmt = br["node"] + let np: Int = br["pos"] + el_release(br) + if np > p { + let body_arena: Any = el_arena_push() + let body_decl = cg_stmt(bstmt, " ", body_decl) + el_arena_pop(body_arena) + el_release(bstmt) + let p = np + } else { + let p = p + 1 + } + } + } + } + // Skip past closing RBrace. + if str_eq(tok_kind(tokens, p), "RBrace") { let p = p + 1 } + el_release(body_decl) + emit_line("}") + emit_blank() + let pos = p + } else { + // Non-test mode: skip test blocks entirely to avoid OOM. + // Without this skip, the body `{ ... }` would be parsed as a Map + // literal, building a huge AST with O(n²) string allocation. + let p: Int = pos + 1 + let k_name: String = tok_kind(tokens, p) + if str_eq(k_name, "Str") { let p = p + 1 } + let k_body: String = tok_kind(tokens, p) + if str_eq(k_body, "LBrace") { let p = skip_to_rbrace(tokens, p) } + let pos = p + } + } else { + let r = parse_one(tokens, pos) + let stmt = r["node"] + let new_pos: Int = r["pos"] + el_release(r) + + // Guard against infinite loops + if new_pos <= pos { + el_release(stmt) + let pos = pos + 1 + } else { + let sk: String = stmt["stmt"] + + if str_eq(sk, "FnDef") { + let fn_name2: String = stmt["name"] + if str_eq(fn_name2, "main") { + // Capture main() body for later + let body = stmt["body"] + let bn: Int = native_list_len(body) + let bi: Int = 0 + while bi < bn { + let el_main_body = native_list_append(el_main_body, native_list_get(body, bi)) + let bi = bi + 1 + } + el_release(stmt) + } else { + // Emit immediately — this is the JIT core + // Arena scope: free all intermediate strings (str_concat, + // int_to_str, cg_expr fragments) after each function. + let fn_arena_mark: Any = el_arena_push() + cg_fn(stmt) + el_release(stmt) + el_arena_pop(fn_arena_mark) + } + } else { + if is_top_level_decl(stmt) { + // Import, TypeDef, EnumDef, CgiBlock, ServiceBlock, ExternFn + // These are no-ops in codegen (forward decls already emitted) + el_release(stmt) + } else { + if str_eq(sk, "Let") { + // Top-level let: file-scope slot already declared. + // Keep for main() init — these are few and small. + let toplevel_exec_stmts = native_list_append(toplevel_exec_stmts, stmt) + let has_toplevel_exec = true + } else { + // Executable top-level stmt (rare) + let toplevel_exec_stmts = native_list_append(toplevel_exec_stmts, stmt) + let has_toplevel_exec = true + } + } + } + + let pos = new_pos + } + } + } + } + } + + // Tokens fully consumed by the streaming loop — release now to free peak heap. + el_release(tokens) + + if test_is_mode { + // Test mode: emit test harness main() that calls each collected test function. + // Discard El's main body and top-level exec stmts (not needed in test harness). + el_release(el_main_body) + el_release(toplevel_exec_stmts) + el_release(toplevel_let_names) + el_release(sigs) + + let test_arena_mark: Any = el_arena_push() + emit_line("int main(int _argc, char **_argv) {") + emit_line(" el_runtime_init_args(_argc, _argv);") + let ti: Int = 0 + let tn: Int = native_list_len(test_c_names) + while ti < tn { + let tc_name: String = native_list_get(test_c_names, ti) + emit_line(" " + tc_name + "();") + let ti = ti + 1 + } + emit_line(" printf(\"%d passed, %d failed\\n\", __el_pass, __el_fail);") + emit_line(" return __el_fail;") + emit_line("}") + el_arena_pop(test_arena_mark) + el_release(test_names) + el_release(test_c_names) + return "" + } + + // Release test tracking lists (empty in non-test mode). + el_release(test_names) + el_release(test_c_names) + + // Library detection: no fn main and no top-level executable stmts + let is_library: Bool = false + if !has_el_main { + if !has_toplevel_exec { + let is_library = true + } + } + if is_library { return "" } + + // Emit main() — wrap in arena scope to free intermediate strings. + let main_arena_mark: Any = el_arena_push() + let kind2: String = state_get("__program_kind") + emit_line("int main(int _argc, char** _argv) {") + emit_line(" el_runtime_init_args(_argc, _argv);") + + // cgi init if needed + let ns2: Int = native_list_len(sigs) + let si2: Int = 0 + while si2 < ns2 { + let sig2 = native_list_get(sigs, si2) + let sk3: String = sig2["kind"] + if str_eq(sk3, "cgi_block") { + // We need the full cgi_block data — it was parsed by scan_fn_sigs + // but scan only stored the name. For cgi_init we need dharma_id etc. + // Since cgi blocks are rare and small, they end up in toplevel_exec_stmts. + // Find the CgiBlock in toplevel_exec_stmts. + let tes_n: Int = native_list_len(toplevel_exec_stmts) + let tes_i: Int = 0 + while tes_i < tes_n { + let tes = native_list_get(toplevel_exec_stmts, tes_i) + let tes_k: String = tes["stmt"] + if str_eq(tes_k, "CgiBlock") { + let cname2: String = tes["name"] + let cdid2: String = tes["dharma_id"] + let cprin2: String = tes["principal"] + let cnet2: String = tes["network"] + let ceng2: String = tes["engram"] + let has_did2: Bool = tes["has_dharma_id"] + let has_prin2: Bool = tes["has_principal"] + let has_net2: Bool = tes["has_network"] + let has_eng2: Bool = tes["has_engram"] + let arg_name2: String = "EL_STR(" + c_str_lit(cname2) + ")" + let arg_did2: String = cgi_arg(cdid2, has_did2) + let arg_prin2: String = cgi_arg(cprin2, has_prin2) + let arg_net2: String = cgi_arg(cnet2, has_net2) + let arg_eng2: String = cgi_arg(ceng2, has_eng2) + emit_line(" el_cgi_init(" + arg_name2 + ", " + arg_did2 + ", " + arg_prin2 + ", " + arg_net2 + ", " + arg_eng2 + ");") + } + let tes_i = tes_i + 1 + } + } + let si2 = si2 + 1 + } + + // sigs fully consumed — release to free peak heap. + el_release(sigs) + + // Seed declared set with top-level let names + let main_decl2: [String] = native_list_empty() + let tln: Int = native_list_len(toplevel_let_names) + let tli: Int = 0 + while tli < tln { + let main_decl2 = native_list_append(main_decl2, native_list_get(toplevel_let_names, tli)) + let tli = tli + 1 + } + // toplevel_let_names fully consumed — release to free peak heap. + el_release(toplevel_let_names) + + // Emit top-level executable stmts (lets and others) into main() + // Per-statement arena scope mirrors el_main_body: frees intermediate strings + // (str_concat fragments from cg_expr) after each statement, preventing O(n²) + // accumulation when many stmts are present (e.g. from unrecognized constructs). + let tes_n2: Int = native_list_len(toplevel_exec_stmts) + let tes_i2: Int = 0 + while tes_i2 < tes_n2 { + let tes2 = native_list_get(toplevel_exec_stmts, tes_i2) + let tes_k2: String = tes2["stmt"] + if !str_eq(tes_k2, "CgiBlock") { + if !str_eq(tes_k2, "ServiceBlock") { + let tes_mark: Any = el_arena_push() + let main_decl2 = cg_stmt(tes2, " ", main_decl2) + el_arena_pop(tes_mark) + } + } + let tes_i2 = tes_i2 + 1 + } + // toplevel_exec_stmts fully consumed — release to free peak heap. + el_release(toplevel_exec_stmts) + + // Emit fn main() body — per-statement arena scope frees intermediate strings. + let mn: Int = native_list_len(el_main_body) + let mi: Int = 0 + while mi < mn { + let mstmt = native_list_get(el_main_body, mi) + let stmt_mark: Any = el_arena_push() + let main_decl2 = cg_stmt(mstmt, " ", main_decl2) + el_arena_pop(stmt_mark) + let mi = mi + 1 + } + // el_main_body and main_decl2 fully consumed — release to free peak heap. + el_release(el_main_body) + el_release(main_decl2) + + emit_line(" return 0;") + emit_line("}") + emit_blank() + + emit_cap_violations() + emit_arity_violations() + emit_time_violations() + el_arena_pop(main_arena_mark) + + "" +} diff --git a/lang/el-compiler/src/compiler.el b/lang/el-compiler/src/compiler.el index e74b38a..a4f5fdc 100644 --- a/lang/el-compiler/src/compiler.el +++ b/lang/el-compiler/src/compiler.el @@ -20,18 +20,44 @@ import "codegen.el" import "codegen-js.el" // compile — full pipeline (C target): source string -> C source string +// Uses JIT function-at-a-time streaming: parse one decl → emit C → discard AST. +// Peak memory is O(one function's AST) instead of O(whole program AST). fn compile(source: String) -> String { - let tokens: [Map] = lex(source) - let stmts: [Map] = parse(tokens) - // Token list is no longer needed after parsing — release it to free memory - // before codegen allocates its own working data on large source files. - el_release(tokens) - codegen(stmts, source) + // Top-level arena scope: activates the string arena before lex() so that + // ALL strdup allocations (token strings, sig strings, codegen fragments) + // are tracked and freed on pop. Without this, lex() and scan_fn_sigs() + // run before any push, leaving _tl_arena_active=0 and leaking every + // token string. Also prevents inner pop(mark=0) calls from deactivating + // the arena between per-function scopes. + let top_mark: Any = el_arena_push() + let tokens: [Any] = lex(source) + // Fast pre-scan: collect fn signatures + program kind without building + // full expression ASTs. O(tokens) time, minimal allocation. + let sigs: [Map] = scan_fn_sigs(tokens) + // Stream parse-emit: parse one decl at a time, emit C, discard. + // All output written to stdout via println before pop. + codegen_streaming(tokens, sigs, source) + el_arena_pop(top_mark) + "" +} + +// compile_test — like compile() but sets __test_mode so codegen_streaming +// compiles test { } blocks instead of skipping them, and emits the test +// harness main() instead of the normal int main(). +fn compile_test(source: String) -> String { + state_set("__test_mode", "1") + let top_mark: Any = el_arena_push() + let tokens: [Any] = lex(source) + let sigs: [Map] = scan_fn_sigs(tokens) + codegen_streaming(tokens, sigs, source) + el_arena_pop(top_mark) + state_set("__test_mode", "") + "" } // compile_js — full pipeline (JS target, module mode): source string -> JS source string fn compile_js(source: String) -> String { - let tokens: [Map] = lex(source) + let tokens: [Any] = lex(source) let stmts: [Map] = parse(tokens) // Token list is no longer needed after parsing — release it to free memory. el_release(tokens) @@ -41,7 +67,7 @@ fn compile_js(source: String) -> String { // compile_js_with_bundle — JS target in bundle mode. // Reads el_runtime.js from runtime_path and inlines it inside an IIFE. fn compile_js_with_bundle(source: String, runtime_path: String) -> String { - let tokens: [Map] = lex(source) + let tokens: [Any] = lex(source) let stmts: [Map] = parse(tokens) el_release(tokens) let runtime_content: String = fs_read(runtime_path) @@ -147,6 +173,18 @@ fn detect_obfuscate(argv: [String]) -> Bool { return false } +// Detect --test flag in argv. +fn detect_test(argv: [String]) -> Bool { + let n: Int = native_list_len(argv) + let i = 0 + while i < n { + let a: String = native_list_get(argv, i) + if str_eq(a, "--test") { return true } + let i = i + 1 + } + return false +} + // Build a unique temp file path: /tmp/elc--. fn make_temp_path(suffix: String) -> String { let pid: Int = getpid_now() @@ -476,6 +514,7 @@ fn main() -> Void { let do_bundle: Bool = detect_bundle(argv) let do_minify: Bool = detect_minify(argv) let do_obfuscate: Bool = detect_obfuscate(argv) + let do_test: Bool = detect_test(argv) // --obfuscate implies --minify: obfuscating unminified code is pointless. if do_obfuscate { let do_minify = true @@ -483,7 +522,7 @@ fn main() -> Void { let positional: [String] = strip_flags(argv) let argc: Int = native_list_len(positional) if argc < 1 { - println("el-compiler: usage: elc [--target=c|js] [--bundle] [--minify] [--obfuscate] [--emit-header] []") + println("el-compiler: usage: elc [--target=c|js] [--bundle] [--minify] [--obfuscate] [--emit-header] [--test] []") exit(1) } @@ -501,7 +540,7 @@ fn main() -> Void { // (without inlining imports) and write out a .elh file alongside the .c. if do_emit_header { let raw_source: String = fs_read(src_path) - let hdr_tokens: [Map] = lex(raw_source) + let hdr_tokens: [Any] = lex(raw_source) let hdr_stmts: [Map] = parse(hdr_tokens) el_release(hdr_tokens) let hdr_path: String = str_slice(src_path, 0, str_len(src_path) - 3) + ".elh" @@ -520,6 +559,12 @@ fn main() -> Void { exit(0) } + // --test mode: compile with test harness (C target only). + if do_test { + compile_test(source) + exit(0) + } + // Standard path (no post-processing). let out: String = "" if do_bundle { diff --git a/lang/el-compiler/src/lexer.el b/lang/el-compiler/src/lexer.el index 504c9c6..48cf5fe 100644 --- a/lang/el-compiler/src/lexer.el +++ b/lang/el-compiler/src/lexer.el @@ -7,11 +7,50 @@ // // Entry point: fn lex(source: String) -> [Map] // -// Uses native_string_chars to split the source into a chars list, -// then indexes it with native_list_get - avoids O(N-) string cloning. +// Performance: the hot lexer loop uses str_char_code (returns Int) instead of +// str_char_at (returns strdup'd String) for character classification. +// For a 400KB source, str_char_at allocates ~400K × 16B = ~6.4MB of temporary +// strings for the `ch` variable alone. str_char_code avoids all that. -// -- Character helpers --------------------------------------------------------- +// -- Character helpers (Int-based, no string allocation) ---------------------- +// These operate on char codes (from str_char_code) instead of str_char_at, +// eliminating one strdup per character in the hot lexer loop. +fn is_digit_code(c: Int) -> Bool { + // '0'=48 .. '9'=57 + if c >= 48 { + if c <= 57 { return true } + } + false +} + +fn is_alpha_code(c: Int) -> Bool { + // 'A'=65..'Z'=90, 'a'=97..'z'=122 + if c >= 65 { + if c <= 90 { return true } + } + if c >= 97 { + if c <= 122 { return true } + } + false +} + +fn is_alnum_or_underscore_code(c: Int) -> Bool { + if is_digit_code(c) { return true } + if is_alpha_code(c) { return true } + if c == 95 { return true } // '_' + false +} + +fn is_ws_code(c: Int) -> Bool { + if c == 32 { return true } // ' ' + if c == 9 { return true } // '\t' + if c == 10 { return true } // '\n' + if c == 13 { return true } // '\r' + false +} + +// Legacy String-based helpers kept for scan_interp helpers that use str_char_at. fn lex_is_digit(ch: String) -> Bool { if ch == "0" { return true } if ch == "1" { return true } @@ -97,8 +136,11 @@ fn lex_is_whitespace(ch: String) -> Bool { false } -fn make_tok(kind: String, value: String) -> Map { - { "kind": kind, "value": value } +// tok_append — append a (kind, value) pair to a flat token list. +// Returns the updated list. Gamma combines flat-list + char-code for max savings. +fn tok_append(tokens: [Any], kind: String, value: String) -> [Any] { + let tokens = native_list_append(tokens, kind) + native_list_append(tokens, value) } // -- Keyword lookup ------------------------------------------------------------ @@ -157,45 +199,43 @@ fn keyword_kind(word: String) -> String { // scan_digits - advance i while chars[i] is a digit // Returns { "text": ..., "pos": i } -fn scan_digits(chars: [String], start: Int, total: Int) -> Map { +fn scan_digits(src: String, start: Int, total: Int) -> Map { let i = start - let parts: [String] = native_list_empty() let running = true while running { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) - if lex_is_digit(ch) { - let parts = native_list_append(parts, ch) + let c: Int = str_char_code(src, i) + if is_digit_code(c) { let i = i + 1 } else { let running = false } } } - { "text": str_join(parts, ""), "pos": i } + // Use str_slice instead of building a parts list — O(1) allocation, O(n) copy. + { "text": str_slice(src, start, i), "pos": i } } // scan_ident - advance i while chars[i] is alphanumeric or underscore -fn scan_ident(chars: [String], start: Int, total: Int) -> Map { +fn scan_ident(src: String, start: Int, total: Int) -> Map { let i = start - let parts: [String] = native_list_empty() let running = true while running { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) - if is_alnum_or_underscore(ch) { - let parts = native_list_append(parts, ch) + let c: Int = str_char_code(src, i) + if is_alnum_or_underscore_code(c) { let i = i + 1 } else { let running = false } } } - { "text": str_join(parts, ""), "pos": i } + // Use str_slice instead of building a parts list — O(1) allocation, O(n) copy. + { "text": str_slice(src, start, i), "pos": i } } // -- Code-bearing string detection + comment strip ---------------------------- @@ -208,34 +248,16 @@ fn scan_ident(chars: [String], start: Int, total: Int) -> Map { // looks_like_code - heuristic gate so we only strip strings that actually // embed JS or CSS. Plain prose, hex blobs, JSON, etc. pass through verbatim. -fn substr_at(chars: [String], start: Int, total: Int, needle: String) -> Bool { - let nchars: [String] = native_string_chars(needle) - let nlen: Int = native_list_len(nchars) +fn substr_at(src: String, start: Int, total: Int, needle: String) -> Bool { + let nlen: Int = str_len(needle) if start + nlen > total { return false } - let i = 0 - let matched = true - while i < nlen { - let a: String = native_list_get(chars, start + i) - let b: String = native_list_get(nchars, i) - if a == b { let i = i + 1 } else { let matched = false; let i = nlen } - } - matched + // Use str_slice comparison instead of char-by-char loop. + str_eq(str_slice(src, start, start + nlen), needle) } fn str_has(s: String, needle: String) -> Bool { - let chars: [String] = native_string_chars(s) - let total: Int = native_list_len(chars) - let i = 0 - let found = false - while i < total { - if substr_at(chars, i, total, needle) { - let found = true - let i = total - } else { - let i = i + 1 - } - } - found + // Use the built-in str_contains which is implemented in native C — O(n) single pass. + str_contains(s, needle) } fn looks_like_code(s: String) -> Bool { @@ -254,8 +276,7 @@ fn looks_like_code(s: String) -> Bool { // comment opener: if the char immediately before '/' is ':', emit the '/' // literally and advance one position. fn strip_code_comments(s: String) -> String { - let chars: [String] = native_string_chars(s) - let total: Int = native_list_len(chars) + let total: Int = str_len(s) let out_parts: [String] = native_list_empty() let i = 0 let in_squote = false @@ -263,7 +284,7 @@ fn strip_code_comments(s: String) -> String { let in_btick = false let prev = "" while i < total { - let ch: String = native_list_get(chars, i) + let ch: String = str_char_at(s, i) let in_js_string = false if in_squote { let in_js_string = true } if in_dquote { let in_js_string = true } @@ -275,7 +296,7 @@ fn strip_code_comments(s: String) -> String { let out_parts = native_list_append(out_parts, ch) let next_i = i + 1 if next_i < total { - let nc: String = native_list_get(chars, next_i) + let nc: String = str_char_at(s, next_i) let out_parts = native_list_append(out_parts, nc) let prev = nc let i = next_i + 1 @@ -304,7 +325,7 @@ fn strip_code_comments(s: String) -> String { let next_i = i + 1 let next_ch = "" if next_i < total { - let next_ch: String = native_list_get(chars, next_i) + let next_ch: String = str_char_at(s, next_i) } if ch == "/" { @@ -323,7 +344,7 @@ fn strip_code_comments(s: String) -> String { if i >= total { let scanning = false } else { - let lc: String = native_list_get(chars, i) + let lc: String = str_char_at(s, i) if lc == "\n" { let scanning = false } else { @@ -342,11 +363,11 @@ fn strip_code_comments(s: String) -> String { if i >= total { let scanning2 = false } else { - let bc: String = native_list_get(chars, i) + let bc: String = str_char_at(s, i) if bc == "*" { let after = i + 1 if after < total { - let nc2: String = native_list_get(chars, after) + let nc2: String = str_char_at(s, after) if nc2 == "/" { let i = after + 1 let scanning2 = false @@ -402,7 +423,7 @@ fn strip_code_comments(s: String) -> String { // scan_string - scan a quoted string literal, handling \" escapes. // Starts AFTER the opening quote. Returns { "text": content, "pos": i_after_close } -fn scan_string(chars: [String], start: Int, total: Int) -> Map { +fn scan_string(src: String, start: Int, total: Int) -> Map { let i = start let parts: [String] = native_list_empty() let running = true @@ -410,12 +431,12 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) + let ch: String = str_char_at(src, i) if ch == "\\" { // escape: peek next char let next_i = i + 1 if next_i < total { - let next_ch: String = native_list_get(chars, next_i) + let next_ch: String = str_char_at(src, next_i) if next_ch == "\"" { let parts = native_list_append(parts, "\"") let i = next_i + 1 @@ -465,19 +486,17 @@ fn scan_string(chars: [String], start: Int, total: Int) -> Map { // scan_interp_brace - scan from `start` (the char after `${`) to the matching // `}`, tracking brace depth so inner braces (e.g. fn calls, map literals) are // handled correctly. Returns { "text": inner_source, "pos": i_after_close }. -fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map { +fn scan_interp_brace(src: String, start: Int, total: Int) -> Map { let i = start - let parts: [String] = native_list_empty() let depth = 1 let running = true while running { if i >= total { let running = false } else { - let ch: String = native_list_get(chars, i) + let ch: String = str_char_at(src, i) if ch == "{" { let depth = depth + 1 - let parts = native_list_append(parts, ch) let i = i + 1 } else { if ch == "}" { @@ -487,33 +506,33 @@ fn scan_interp_brace(chars: [String], start: Int, total: Int) -> Map], src: [Map]) -> [Map] { +// interp_tokens_append_all - copy every (kind, value) pair from flat src list +// into flat dst list, skipping the trailing Eof pair that lex() always appends. +fn interp_tokens_append_all(dst: [Any], src: [Any]) -> [Any] { let src_len: Int = native_list_len(src) let j = 0 let result = dst while j < src_len { - let tok: Map = native_list_get(src, j) - let tk: String = tok["kind"] - if tk == "Eof" { + let kind: String = native_list_get(src, j) + if kind == "Eof" { let j = src_len } else { - let result = native_list_append(result, tok) - let j = j + 1 + let val: String = native_list_get(src, j + 1) + let result = native_list_append(result, kind) + let result = native_list_append(result, val) + let j = j + 2 } } result @@ -536,10 +555,17 @@ fn interp_tokens_append_all(dst: [Map], src: [Map]) -> // // Supported escape sequences: \" \n \t \r \\ \$ (literal dollar sign). // Nested quotes inside ${} are not supported; use a variable instead. -fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map { +// +// Performance: uses str_char_code (Int) for all character dispatch, eliminating +// per-character strdup. Plain runs are batched into str_slice segments instead +// of accumulating single-char strings, reducing list appends from O(N) to O(K) +// where K = number of escape/special chars in the literal. +// Char codes: '\' = 92, '"' = 34, '$' = 36, '{' = 123 +fn scan_interp_string(src: String, start: Int, total: Int) -> Map { let i = start - let out_tokens: [Map] = native_list_empty() - let cur_part: [String] = native_list_empty() + let out_tokens: [Any] = native_list_empty() + let cur_parts: [String] = native_list_empty() + let clean_start = start let has_interp = false let need_plus = false let running = true @@ -548,39 +574,55 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map= total { let running = false } else { - let ch: String = native_list_get(chars, i) + let c: Int = str_char_code(src, i) - if ch == "\\" { - // Escape sequence + if c == 92 { + // '\\' = 92 — escape sequence: flush clean run, append resolved char + if clean_start < i { + let cur_parts = native_list_append(cur_parts, str_slice(src, clean_start, i)) + } let next_i = i + 1 if next_i < total { - let next_ch: String = native_list_get(chars, next_i) - if next_ch == "$" { - // \$ => literal '$' (escape for interpolation syntax) - let cur_part = native_list_append(cur_part, "$") + let nc: Int = str_char_code(src, next_i) + if nc == 36 { + // '\$' => literal '$' (36 = '$') + let cur_parts = native_list_append(cur_parts, "$") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "\"" { - let cur_part = native_list_append(cur_part, "\"") + if nc == 34 { + // '\"' => literal '"' (34 = '"') + let cur_parts = native_list_append(cur_parts, "\"") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "n" { - let cur_part = native_list_append(cur_part, "\n") + if nc == 110 { + // '\n' (110 = 'n') + let cur_parts = native_list_append(cur_parts, "\n") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "t" { - let cur_part = native_list_append(cur_part, "\t") + if nc == 116 { + // '\t' (116 = 't') + let cur_parts = native_list_append(cur_parts, "\t") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "r" { - let cur_part = native_list_append(cur_part, "\r") + if nc == 114 { + // '\r' (114 = 'r') + let cur_parts = native_list_append(cur_parts, "\r") + let clean_start = next_i + 1 let i = next_i + 1 } else { - if next_ch == "\\" { - let cur_part = native_list_append(cur_part, "\\") + if nc == 92 { + // '\\' (92) + let cur_parts = native_list_append(cur_parts, "\\") + let clean_start = next_i + 1 let i = next_i + 1 } else { - let cur_part = native_list_append(cur_part, next_ch) + // Unknown escape: emit the escaped char verbatim + let cur_parts = native_list_append(cur_parts, str_slice(src, next_i, next_i + 1)) + let clean_start = next_i + 1 let i = next_i + 1 } } @@ -589,75 +631,85 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map 0 { - let part_text = str_join(cur_part, "") + let part_text = str_join(cur_parts, "") if need_plus { - let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+")) + let out_tokens = tok_append(out_tokens, "Plus", "+") } let clean_part = part_text if looks_like_code(part_text) { let clean_part = strip_code_comments(part_text) } - let out_tokens = native_list_append(out_tokens, make_tok("Str", clean_part)) + let out_tokens = tok_append(out_tokens, "Str", clean_part) let need_plus = true } - let cur_part = native_list_empty() + let cur_parts = native_list_empty() let has_interp = true // Scan brace-balanced expression source - let brace_result = scan_interp_brace(chars, next_i + 1, total) + let brace_result = scan_interp_brace(src, next_i + 1, total) let expr_src: String = brace_result["text"] let new_i: Int = brace_result["pos"] let i = new_i + let clean_start = new_i // Re-lex the expression and inline the tokens. // Wrap in ( ) so that operators inside ${} (e.g. // age + 1) are parsed as a grouped sub-expression // rather than merging with the surrounding concat // Plus tokens at the wrong precedence level. - let inner_toks: [Map] = lex(expr_src) + let inner_toks: [Any] = lex(expr_src) let inner_len: Int = native_list_len(inner_toks) if need_plus { - let out_tokens = native_list_append(out_tokens, make_tok("Plus", "+")) + let out_tokens = tok_append(out_tokens, "Plus", "+") } // Empty interpolation ${} => empty string segment - if inner_len <= 1 { - let out_tokens = native_list_append(out_tokens, make_tok("Str", "")) + // inner_len <= 2 = only the Eof pair (kind="Eof", value="") + if inner_len <= 2 { + let out_tokens = tok_append(out_tokens, "Str", "") } else { - let out_tokens = native_list_append(out_tokens, make_tok("LParen", "(")) + let out_tokens = tok_append(out_tokens, "LParen", "(") let out_tokens = interp_tokens_append_all(out_tokens, inner_toks) - let out_tokens = native_list_append(out_tokens, make_tok("RParen", ")")) + let out_tokens = tok_append(out_tokens, "RParen", ")") } let need_plus = true } else { - // Plain '$' not followed by '{' - treat as literal - let cur_part = native_list_append(cur_part, "$") + // Plain '$' not followed by '{' - treat as literal, continue clean run let i = i + 1 } } else { - let cur_part = native_list_append(cur_part, ch) + // Plain char — extends clean run, no append needed let i = i + 1 } } @@ -666,8 +718,11 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map 0 { @@ -676,9 +731,9 @@ fn scan_interp_string(chars: [String], start: Int, total: Int) -> Map Map' = 62, '&' = 38, '|' = 124 +// '-' = 45, ':' = 58, '+' = 43, '*' = 42, '%' = 37 +// '(' = 40, ')' = 41, '{' = 123, '}' = 125, '[' = 91, ']' = 93 +// ',' = 44, '.' = 46, ';' = 59, '@' = 64, '?' = 63 -fn lex(source: String) -> [Map] { - let chars: [String] = native_string_chars(source) - let total: Int = native_list_len(chars) - let tokens: [Map] = native_list_empty() +fn lex(source: String) -> [Any] { + // Use str_char_code (returns Int) instead of str_char_at (returns strdup String) + // for all character classification in the hot loop. For a 400KB source, + // str_char_at allocates ~400K × 16B = ~6.4MB of temporary strings. + let total: Int = str_len(source) + let tokens: [Any] = native_list_empty() let i: Int = 0 while i < total { - let ch: String = native_list_get(chars, i) + let c: Int = str_char_code(source, i) - // Skip whitespace - if lex_is_whitespace(ch) { + // Skip whitespace (space=32, tab=9, newline=10, CR=13) + if is_ws_code(c) { let i = i + 1 } else { - // Line comments: // - if ch == "/" { + // Line comments: // (slash=47) + if c == 47 { let next_i = i + 1 if next_i < total { - let next_ch: String = native_list_get(chars, next_i) - if next_ch == "/" { - // skip to end of line + let nc: Int = str_char_code(source, next_i) + if nc == 47 { + // skip to end of line (newline=10) let i = i + 2 let running2 = true while running2 { if i >= total { let running2 = false } else { - let lch: String = native_list_get(chars, i) - if lch == "\n" { + let lc: Int = str_char_code(source, i) + if lc == 10 { let running2 = false } else { let i = i + 1 @@ -729,232 +793,254 @@ fn lex(source: String) -> [Map] { } } } else { - let tokens = native_list_append(tokens, make_tok("Slash", "/")) + let tokens = tok_append(tokens, "Slash", "/") let i = i + 1 } } else { - let tokens = native_list_append(tokens, make_tok("Slash", "/")) + let tokens = tok_append(tokens, "Slash", "/") let i = i + 1 } } else { - // String literal (plain or interpolated with ${expr} syntax). - // scan_interp_string handles both cases: plain strings emit a - // single Str token; interpolated strings emit a flat token - // sequence (Str Plus expr-tokens Plus Str ...) that the parser - // naturally assembles into a BinOp concat tree. - if ch == "\"" { - let interp_result = scan_interp_string(chars, i + 1, total) - let interp_toks: [Map] = interp_result["tokens"] + // String literal: '"' = 34 + if c == 34 { + let interp_result = scan_interp_string(source, i + 1, total) + let interp_toks: [Any] = interp_result["tokens"] let new_pos: Int = interp_result["pos"] let tokens = interp_tokens_append_all(tokens, interp_toks) let i = new_pos } else { - // Number literal - if lex_is_digit(ch) { - let result = scan_digits(chars, i, total) + // Number literal: '0'-'9' = 48-57 + if is_digit_code(c) { + let result = scan_digits(source, i, total) let num_text: String = result["text"] let new_pos: Int = result["pos"] - // check for float (dot followed by digit) + // check for float (dot=46 followed by digit) if new_pos < total { - let dot_ch: String = native_list_get(chars, new_pos) - if dot_ch == "." { + let dc: Int = str_char_code(source, new_pos) + if dc == 46 { let after_dot = new_pos + 1 if after_dot < total { - let after_dot_ch: String = native_list_get(chars, after_dot) - if lex_is_digit(after_dot_ch) { - let frac_result = scan_digits(chars, after_dot, total) + let adc: Int = str_char_code(source, after_dot) + if is_digit_code(adc) { + let frac_result = scan_digits(source, after_dot, total) let frac_text: String = frac_result["text"] let frac_pos: Int = frac_result["pos"] - let tokens = native_list_append(tokens, make_tok("Float", num_text + "." + frac_text)) + let tokens = tok_append(tokens, "Float", num_text + "." + frac_text) let i = frac_pos } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - let tokens = native_list_append(tokens, make_tok("Int", num_text)) + let tokens = tok_append(tokens, "Int", num_text) let i = new_pos } } else { - // Identifier or keyword - if lex_is_alpha(ch) || ch == "_" { - let result = scan_ident(chars, i, total) + // Identifier or keyword: alpha or '_'=95 + if is_alpha_code(c) || c == 95 { + let result = scan_ident(source, i, total) let word: String = result["text"] let new_pos: Int = result["pos"] let kw = keyword_kind(word) if kw == "" { - let tokens = native_list_append(tokens, make_tok("Ident", word)) + let tokens = tok_append(tokens, "Ident", word) } else { - let tokens = native_list_append(tokens, make_tok(kw, word)) + let tokens = tok_append(tokens, kw, word) } let i = new_pos } else { // Multi-char and single-char operators/delimiters let peek_i = i + 1 - let peek_ch = "" + let peek_c: Int = -1 if peek_i < total { - let peek_ch: String = native_list_get(chars, peek_i) + let peek_c: Int = str_char_code(source, peek_i) } - if ch == "=" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("EqEq", "==")) + if c == 61 { + // '=' = 61 + if peek_c == 61 { + let tokens = tok_append(tokens, "EqEq", "==") let i = i + 2 } else { - if peek_ch == ">" { - let tokens = native_list_append(tokens, make_tok("FatArrow", "=>")) + if peek_c == 62 { + // '>' = 62 + let tokens = tok_append(tokens, "FatArrow", "=>") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Eq", "=")) + let tokens = tok_append(tokens, "Eq", "=") let i = i + 1 } } } else { - if ch == "!" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("NotEq", "!=")) + if c == 33 { + // '!' = 33 + if peek_c == 61 { + let tokens = tok_append(tokens, "NotEq", "!=") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Not", "!")) + let tokens = tok_append(tokens, "Not", "!") let i = i + 1 } } else { - if ch == "<" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("LtEq", "<=")) + if c == 60 { + // '<' = 60 + if peek_c == 61 { + let tokens = tok_append(tokens, "LtEq", "<=") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Lt", "<")) + let tokens = tok_append(tokens, "Lt", "<") let i = i + 1 } } else { - if ch == ">" { - if peek_ch == "=" { - let tokens = native_list_append(tokens, make_tok("GtEq", ">=")) + if c == 62 { + // '>' = 62 + if peek_c == 61 { + let tokens = tok_append(tokens, "GtEq", ">=") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Gt", ">")) + let tokens = tok_append(tokens, "Gt", ">") let i = i + 1 } } else { - if ch == "&" { - if peek_ch == "&" { - let tokens = native_list_append(tokens, make_tok("And", "&&")) + if c == 38 { + // '&' = 38 + if peek_c == 38 { + let tokens = tok_append(tokens, "And", "&&") let i = i + 2 } else { let i = i + 1 } } else { - if ch == "|" { - if peek_ch == "|" { - let tokens = native_list_append(tokens, make_tok("Or", "||")) + if c == 124 { + // '|' = 124 + if peek_c == 124 { + let tokens = tok_append(tokens, "Or", "||") let i = i + 2 } else { - if peek_ch == ">" { - let tokens = native_list_append(tokens, make_tok("PipeOp", "|>")) + if peek_c == 62 { + // '>' = 62 + let tokens = tok_append(tokens, "PipeOp", "|>") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Pipe", "|")) + let tokens = tok_append(tokens, "Pipe", "|") let i = i + 1 } } } else { - if ch == "-" { - if peek_ch == ">" { - let tokens = native_list_append(tokens, make_tok("Arrow", "->")) + if c == 45 { + // '-' = 45 + if peek_c == 62 { + // '>' = 62 + let tokens = tok_append(tokens, "Arrow", "->") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Minus", "-")) + let tokens = tok_append(tokens, "Minus", "-") let i = i + 1 } } else { - if ch == ":" { - if peek_ch == ":" { - let tokens = native_list_append(tokens, make_tok("ColonColon", "::")) + if c == 58 { + // ':' = 58 + if peek_c == 58 { + let tokens = tok_append(tokens, "ColonColon", "::") let i = i + 2 } else { - let tokens = native_list_append(tokens, make_tok("Colon", ":")) + let tokens = tok_append(tokens, "Colon", ":") let i = i + 1 } } else { - if ch == "+" { - let tokens = native_list_append(tokens, make_tok("Plus", "+")) + if c == 43 { + // '+' = 43 + let tokens = tok_append(tokens, "Plus", "+") let i = i + 1 } else { - if ch == "*" { - let tokens = native_list_append(tokens, make_tok("Star", "*")) + if c == 42 { + // '*' = 42 + let tokens = tok_append(tokens, "Star", "*") let i = i + 1 } else { - if ch == "%" { - let tokens = native_list_append(tokens, make_tok("Percent", "%")) + if c == 37 { + // '%' = 37 + let tokens = tok_append(tokens, "Percent", "%") let i = i + 1 } else { - if ch == "(" { - let tokens = native_list_append(tokens, make_tok("LParen", "(")) + if c == 40 { + // '(' = 40 + let tokens = tok_append(tokens, "LParen", "(") let i = i + 1 } else { - if ch == ")" { - let tokens = native_list_append(tokens, make_tok("RParen", ")")) + if c == 41 { + // ')' = 41 + let tokens = tok_append(tokens, "RParen", ")") let i = i + 1 } else { - if ch == "{" { - let tokens = native_list_append(tokens, make_tok("LBrace", "{")) + if c == 123 { + // '{' = 123 + let tokens = tok_append(tokens, "LBrace", "{") let i = i + 1 } else { - if ch == "}" { - let tokens = native_list_append(tokens, make_tok("RBrace", "}")) + if c == 125 { + // '}' = 125 + let tokens = tok_append(tokens, "RBrace", "}") let i = i + 1 } else { - if ch == "[" { - let tokens = native_list_append(tokens, make_tok("LBracket", "[")) + if c == 91 { + // '[' = 91 + let tokens = tok_append(tokens, "LBracket", "[") let i = i + 1 } else { - if ch == "]" { - let tokens = native_list_append(tokens, make_tok("RBracket", "]")) + if c == 93 { + // ']' = 93 + let tokens = tok_append(tokens, "RBracket", "]") let i = i + 1 } else { - if ch == "," { - let tokens = native_list_append(tokens, make_tok("Comma", ",")) + if c == 44 { + // ',' = 44 + let tokens = tok_append(tokens, "Comma", ",") let i = i + 1 } else { - if ch == "." { - // Check for ..= (inclusive range) before .. (exclusive range) before single . + if c == 46 { + // '.' = 46: check for ..= or .. let peek2_i = i + 2 - let peek2_ch = "" + let peek2_c: Int = -1 if peek2_i < total { - let peek2_ch: String = native_list_get(chars, peek2_i) + let peek2_c: Int = str_char_code(source, peek2_i) } - if peek_ch == "." { - if peek2_ch == "=" { - let tokens = native_list_append(tokens, make_tok("DotDotEq", "..=")) + if peek_c == 46 { + // '..' prefix + if peek2_c == 61 { + // '..=' = 46 46 61 + let tokens = tok_append(tokens, "DotDotEq", "..=") let i = i + 3 } else { - let tokens = native_list_append(tokens, make_tok("DotDot", "..")) + let tokens = tok_append(tokens, "DotDot", "..") let i = i + 2 } } else { - let tokens = native_list_append(tokens, make_tok("Dot", ".")) + let tokens = tok_append(tokens, "Dot", ".") let i = i + 1 } } else { - if ch == ";" { - let tokens = native_list_append(tokens, make_tok("Semicolon", ";")) + if c == 59 { + // ';' = 59 + let tokens = tok_append(tokens, "Semicolon", ";") let i = i + 1 } else { - if ch == "@" { - let tokens = native_list_append(tokens, make_tok("At", "@")) + if c == 64 { + // '@' = 64 + let tokens = tok_append(tokens, "At", "@") let i = i + 1 } else { - if ch == "?" { - let tokens = native_list_append(tokens, make_tok("QuestionMark", "?")) + if c == 63 { + // '?' = 63 + let tokens = tok_append(tokens, "QuestionMark", "?") let i = i + 1 } else { // unknown char - skip @@ -988,6 +1074,6 @@ fn lex(source: String) -> [Map] { } } - let tokens = native_list_append(tokens, make_tok("Eof", "")) + let tokens = tok_append(tokens, "Eof", "") tokens } diff --git a/lang/el-compiler/src/parser.el b/lang/el-compiler/src/parser.el index 62fda8c..025ac90 100644 --- a/lang/el-compiler/src/parser.el +++ b/lang/el-compiler/src/parser.el @@ -9,25 +9,28 @@ // The token list is passed as a parameter to all parse functions. // native_list_get is used to index into it without cloning. // -// Entry point: fn parse(tokens: [Map]) -> [Map] +// Entry point: fn parse(tokens: [Any]) -> [Map] // -- Token access helpers ------------------------------------------------------ +// Tokens is a flat [Any] list: tokens[2*i] = kind, tokens[2*i+1] = value. +// This avoids one ElMap allocation per token (~112B each), saving ~4MB on large +// programs. All callers use these helpers -- only these three need updating. -fn tok_at(tokens: [Map], pos: Int) -> Map { - native_list_get(tokens, pos) +fn tok_at(tokens: [Any], pos: Int) -> Map { + let kind: String = native_list_get(tokens, pos * 2) + let value: String = native_list_get(tokens, pos * 2 + 1) + { "kind": kind, "value": value } } -fn tok_kind(tokens: [Map], pos: Int) -> String { - let t = native_list_get(tokens, pos) - t["kind"] +fn tok_kind(tokens: [Any], pos: Int) -> String { + native_list_get(tokens, pos * 2) } -fn tok_value(tokens: [Map], pos: Int) -> String { - let t = native_list_get(tokens, pos) - t["value"] +fn tok_value(tokens: [Any], pos: Int) -> String { + native_list_get(tokens, pos * 2 + 1) } -fn expect(tokens: [Map], pos: Int, kind: String) -> Int { +fn expect(tokens: [Any], pos: Int, kind: String) -> Int { let k = tok_kind(tokens, pos) if k == kind { return pos + 1 @@ -46,7 +49,7 @@ fn make_result(node: Map, pos: Int) -> Map { // Skips over a type annotation, returning the new position. // Types can be: Ident, [Type], Map, Type?, Type -fn skip_type(tokens: [Map], pos: Int) -> Int { +fn skip_type(tokens: [Any], pos: Int) -> Int { let k = tok_kind(tokens, pos) // Array type: [Type] if k == "LBracket" { @@ -103,7 +106,7 @@ fn skip_type(tokens: [Map], pos: Int) -> Int { // -- Parameter list ------------------------------------------------------------ // Parses (name: Type, name: Type, ...) - returns { "params": [...], "pos": ... } -fn parse_params(tokens: [Map], pos: Int) -> Map { +fn parse_params(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "LParen") let params: [Map] = native_list_empty() let running = true @@ -292,7 +295,7 @@ fn is_void_element(name: String) -> Bool { // Collect tokens as text content until we hit Lt, LBrace, Eof, or a // closing-tag marker (Lt Slash). Returns { "text": "...", "pos": p } -fn parse_html_text_tokens(tokens: [Map], pos: Int) -> Map { +fn parse_html_text_tokens(tokens: [Any], pos: Int) -> Map { let parts: [String] = native_list_empty() let p = pos let running = true @@ -322,7 +325,7 @@ fn parse_html_text_tokens(tokens: [Map], pos: Int) -> Map). -fn parse_html_attrs(tokens: [Map], pos: Int) -> Map { +fn parse_html_attrs(tokens: [Any], pos: Int) -> Map { let attrs: [Map] = native_list_empty() let p = pos let running = true @@ -355,6 +358,8 @@ fn parse_html_attrs(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, p + 1) let val_node = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let p = expect(tokens, p, "RBrace") let attrs = native_list_append(attrs, { "name": attr_name, "kind": "dynamic", "value": val_node }) } else { @@ -374,7 +379,7 @@ fn parse_html_attrs(tokens: [Map], pos: Int) -> Map { // Parse the children of an HTML element until we see the closing tag // or EOF. Returns { "children": [...], "pos": p_after_closing_tag } -fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) -> Map { +fn parse_html_children(tokens: [Any], pos: Int, parent_tag: String) -> Map { let children: [Map] = native_list_empty() let p = pos let running = true @@ -423,6 +428,8 @@ fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) let r = parse_html_element(tokens, p) let child = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let children = native_list_append(children, child) } } @@ -442,6 +449,8 @@ fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) state_set("__no_block_expr", prev_no_block) let list_expr = r_list["node"] let p = r_list["pos"] + // r_list result map fully consumed — release to free peak heap. + el_release(r_list) // expect "as" let p = expect(tokens, p, "As") // item variable name @@ -453,6 +462,8 @@ fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) let r_body = parse_html_each_body(tokens, p) let body_children = r_body["children"] let p = r_body["pos"] + // r_body result map fully consumed — release to free peak heap. + el_release(r_body) let each_node: Map = { "html": "Each", "list": list_expr, "item": item_name, "body": body_children } let children = native_list_append(children, each_node) } else { @@ -473,6 +484,8 @@ fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) let r = parse_expr(tokens, p + 1) let interp_val = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let p = expect(tokens, p, "RBrace") // Check if the expr is a call to raw() let is_raw_call = false @@ -501,6 +514,8 @@ fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) let r_text = parse_html_text_tokens(tokens, p) let text_str: String = r_text["text"] let p = r_text["pos"] + // r_text result map fully consumed — release to free peak heap. + el_release(r_text) let text_trimmed: String = str_trim(text_str) if !str_eq(text_trimmed, "") { let children = native_list_append(children, { "html": "Text", "text": text_trimmed }) @@ -514,14 +529,14 @@ fn parse_html_children(tokens: [Map], pos: Int, parent_tag: String) // Parse body of {#each} until {/each}. Mirrors parse_html_children but // stops at the {/each} sentinel rather than a closing element tag. -fn parse_html_each_body(tokens: [Map], pos: Int) -> Map { +fn parse_html_each_body(tokens: [Any], pos: Int) -> Map { parse_html_children(tokens, pos, "__each__") } // Parse a single HTML element: children // or self-closing: // Pos points to the Lt token. -fn parse_html_element(tokens: [Map], pos: Int) -> Map { +fn parse_html_element(tokens: [Any], pos: Int) -> Map { let p = pos // consume < let p = expect(tokens, p, "Lt") @@ -532,6 +547,8 @@ fn parse_html_element(tokens: [Map], pos: Int) -> Map let r_attrs = parse_html_attrs(tokens, p) let attrs = r_attrs["attrs"] let p = r_attrs["pos"] + // r_attrs result map fully consumed — release to free peak heap. + el_release(r_attrs) // check for self-closing /> or void element let k = tok_kind(tokens, p) let self_closing = false @@ -552,13 +569,15 @@ fn parse_html_element(tokens: [Map], pos: Int) -> Map let r_children = parse_html_children(tokens, p, tag_name) let children = r_children["children"] let p = r_children["pos"] + // r_children result map fully consumed — release to free peak heap. + el_release(r_children) make_result({ "html": "Element", "tag": tag_name, "attrs": attrs, "children": children, "self_closing": false }, p) } // Entry point for HTML template parsing. // Pos points to Lt (or Lt Not for ). // May parse an optional prefix followed by the root element. -fn parse_html_template(tokens: [Map], pos: Int) -> Map { +fn parse_html_template(tokens: [Any], pos: Int) -> Map { let p = pos // Check for let doctype = false @@ -589,6 +608,8 @@ fn parse_html_template(tokens: [Map], pos: Int) -> Map let r = parse_html_element(tokens, p) let root = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let root_with_doctype = root if doctype { let root_with_doctype = { "html": root["html"], "tag": root["tag"], "attrs": root["attrs"], "children": root["children"], "self_closing": root["self_closing"], "doctype": true } @@ -596,7 +617,7 @@ fn parse_html_template(tokens: [Map], pos: Int) -> Map make_result({ "expr": "HtmlTemplate", "root": root_with_doctype }, p) } -fn parse_primary(tokens: [Map], pos: Int) -> Map { +fn parse_primary(tokens: [Any], pos: Int) -> Map { let k = tok_kind(tokens, pos) let v = tok_value(tokens, pos) @@ -646,6 +667,8 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, pos + 1) let node = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let p = expect(tokens, p, "RParen") return make_result(node, p) } @@ -666,6 +689,8 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, p) let elem = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let elems = native_list_append(elems, elem) let k3 = tok_kind(tokens, p) if k3 == "Comma" { @@ -711,6 +736,8 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, new_p) let val_node = r["node"] let new_p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let pair = { "key": key, "value": val_node } let pairs = native_list_append(pairs, pair) let k3 = tok_kind(tokens, new_p) @@ -757,6 +784,8 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { let r = parse_params(tokens, p) let params = r["params"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let ret_type = "" let k2 = tok_kind(tokens, p) if k2 == "Arrow" { @@ -770,6 +799,8 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { let r2 = parse_block(tokens, p) let body = r2["stmts"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) return make_result({ "expr": "Lambda", "params": params, "body": body, "ret_type": ret_type }, p) } @@ -778,6 +809,8 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { let r = parse_primary(tokens, pos + 1) let inner = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) return make_result({ "expr": "Not", "inner": inner }, p) } @@ -786,6 +819,8 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { let r = parse_primary(tokens, pos + 1) let inner = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) return make_result({ "expr": "Neg", "inner": inner }, p) } @@ -819,7 +854,7 @@ fn parse_primary(tokens: [Map], pos: Int) -> Map { make_result({ "expr": "Nil" }, pos + 1) } -fn parse_if(tokens: [Map], pos: Int) -> Map { +fn parse_if(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "If") // Suppress Map-literal parsing in the cond so a stray `{` (the start // of the then-block) isn't gobbled as a Map. @@ -829,9 +864,13 @@ fn parse_if(tokens: [Map], pos: Int) -> Map { state_set("__no_block_expr", prev_no_block) let cond = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let r2 = parse_block(tokens, p) let then_stmts = r2["stmts"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) let has_else = false let else_stmts: [Map] = native_list_empty() let k2 = tok_kind(tokens, p) @@ -843,19 +882,23 @@ fn parse_if(tokens: [Map], pos: Int) -> Map { let r3 = parse_if(tokens, p) let nested = r3["node"] let p = r3["pos"] + // r3 result map fully consumed — release to free peak heap. + el_release(r3) let else_stmts = native_list_append(else_stmts, { "stmt": "Expr", "value": nested }) let has_else = true } else { let r3 = parse_block(tokens, p) let else_stmts = r3["stmts"] let p = r3["pos"] + // r3 result map fully consumed — release to free peak heap. + el_release(r3) let has_else = true } } make_result({ "expr": "If", "cond": cond, "then": then_stmts, "else": else_stmts, "has_else": has_else }, p) } -fn parse_match(tokens: [Map], pos: Int) -> Map { +fn parse_match(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "Match") let prev_no_block: String = state_get("__no_block_expr") state_set("__no_block_expr", "1") @@ -863,6 +906,8 @@ fn parse_match(tokens: [Map], pos: Int) -> Map { state_set("__no_block_expr", prev_no_block) let subject = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let p = expect(tokens, p, "LBrace") let arms: [Map] = native_list_empty() let running = true @@ -878,10 +923,14 @@ fn parse_match(tokens: [Map], pos: Int) -> Map { let r2 = parse_pattern(tokens, p) let pattern = r2["node"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) let p = expect(tokens, p, "FatArrow") let r3 = parse_expr(tokens, p) let body = r3["node"] let p = r3["pos"] + // r3 result map fully consumed — release to free peak heap. + el_release(r3) let arm = { "pattern": pattern, "body": body } let arms = native_list_append(arms, arm) let k2 = tok_kind(tokens, p) @@ -895,7 +944,7 @@ fn parse_match(tokens: [Map], pos: Int) -> Map { make_result({ "expr": "Match", "subject": subject, "arms": arms }, p) } -fn parse_pattern(tokens: [Map], pos: Int) -> Map { +fn parse_pattern(tokens: [Any], pos: Int) -> Map { let k = tok_kind(tokens, pos) if k == "Ident" { let v = tok_value(tokens, pos) @@ -924,7 +973,7 @@ fn parse_pattern(tokens: [Map], pos: Int) -> Map { make_result({ "pattern": "Wildcard" }, pos + 1) } -fn parse_for_expr(tokens: [Map], pos: Int) -> Map { +fn parse_for_expr(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "For") let item_name = tok_value(tokens, p) let p = p + 1 @@ -935,13 +984,17 @@ fn parse_for_expr(tokens: [Map], pos: Int) -> Map { state_set("__no_block_expr", prev_no_block) let list_expr = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let r2 = parse_block(tokens, p) let body = r2["stmts"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) make_result({ "expr": "For", "item": item_name, "list": list_expr, "body": body }, p) } -fn parse_block(tokens: [Map], pos: Int) -> Map { +fn parse_block(tokens: [Any], pos: Int) -> Map { let p = expect(tokens, pos, "LBrace") let stmts: [Map] = native_list_empty() let running = true @@ -956,6 +1009,8 @@ fn parse_block(tokens: [Map], pos: Int) -> Map { let r = parse_stmt(tokens, p) let stmt = r["node"] let new_p: Int = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let stmts = native_list_append(stmts, stmt) // Non-progress guard: a malformed input (e.g. `||` that // dragged the parser into Map-literal mode partway through @@ -998,10 +1053,12 @@ fn is_duration_unit(name: String) -> Bool { false } -fn parse_postfix(tokens: [Map], pos: Int) -> Map { +fn parse_postfix(tokens: [Any], pos: Int) -> Map { let r = parse_primary(tokens, pos) let node = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) // Postfix duration literal: `.` where is one of // nanos | millis | seconds | minutes | hours | days (each with an @@ -1043,6 +1100,8 @@ fn parse_postfix(tokens: [Map], pos: Int) -> Map { let r2 = parse_expr(tokens, p) let arg = r2["node"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) let args = native_list_append(args, arg) let k3 = tok_kind(tokens, p) if k3 == "Comma" { @@ -1063,6 +1122,8 @@ fn parse_postfix(tokens: [Map], pos: Int) -> Map { let r2 = parse_expr(tokens, p + 1) let idx = r2["node"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) let p = expect(tokens, p, "RBracket") let node = { "expr": "Index", "object": node, "index": idx } } else { @@ -1115,10 +1176,12 @@ fn is_binop(kind: String) -> Bool { false } -fn parse_binop(tokens: [Map], pos: Int, min_prec: Int) -> Map { +fn parse_binop(tokens: [Any], pos: Int, min_prec: Int) -> Map { let r = parse_postfix(tokens, pos) let left = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let running = true while running { let k = tok_kind(tokens, p) @@ -1129,6 +1192,8 @@ fn parse_binop(tokens: [Map], pos: Int, min_prec: Int) -> Map], pos: Int, min_prec: Int) -> Map], pos: Int) -> Map { +fn parse_expr(tokens: [Any], pos: Int) -> Map { parse_binop(tokens, pos, 1) } // -- Statement parsing --------------------------------------------------------- -fn parse_stmt(tokens: [Map], pos: Int) -> Map { +fn parse_stmt(tokens: [Any], pos: Int) -> Map { let k = tok_kind(tokens, pos) // let binding @@ -1171,6 +1236,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, p) let val = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) return make_result({ "stmt": "Let", "name": name, "value": val, "type": ltype }, p) } @@ -1187,6 +1254,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, p) let val = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) return make_result({ "stmt": "Return", "value": val }, p) } @@ -1201,6 +1270,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r = parse_params(tokens, p) let params = r["params"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let ret_type = "" let k3: String = tok_kind(tokens, p) if str_eq(k3, "Arrow") { @@ -1221,6 +1292,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r = parse_params(tokens, p) let params = r["params"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) // return type annotation: -> Type. Capture the leading identifier // so codegen can distinguish Void-returning functions from value- // returning ones. Anything not "Void" is treated as a value type. @@ -1237,6 +1310,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r2 = parse_block(tokens, p) let body = r2["stmts"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) return make_result({ "stmt": "FnDef", "name": name, "params": params, "body": body, "ret_type": ret_type }, p) } @@ -1360,9 +1435,13 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { state_set("__no_block_expr", prev_no_block) let cond = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let r2 = parse_block(tokens, p) let body = r2["stmts"] let p = r2["pos"] + // r2 result map fully consumed — release to free peak heap. + el_release(r2) return make_result({ "stmt": "While", "cond": cond, "body": body }, p) } @@ -1388,6 +1467,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { state_set("__no_block_expr", prev_no_block) let start_expr = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) // Check for range operator: .. (exclusive) or ..= (inclusive) let range_k = tok_kind(tokens, p) if range_k == "DotDot" { @@ -1396,9 +1477,11 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r2 = parse_expr(tokens, p) let end_expr = r2["node"] let p = r2["pos"] + el_release(r2) let r3 = parse_block(tokens, p) let body = r3["stmts"] let p = r3["pos"] + el_release(r3) return make_result({ "stmt": "ForRange", "var": item_name, "start": start_expr, "end": end_expr, "inclusive": false, "body": body }, p) } if range_k == "DotDotEq" { @@ -1407,9 +1490,11 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r2 = parse_expr(tokens, p) let end_expr = r2["node"] let p = r2["pos"] + el_release(r2) let r3 = parse_block(tokens, p) let body = r3["stmts"] let p = r3["pos"] + el_release(r3) return make_result({ "stmt": "ForRange", "var": item_name, "start": start_expr, "end": end_expr, "inclusive": true, "body": body }, p) } // No range operator: regular for-in (list iteration) @@ -1417,6 +1502,7 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r2 = parse_block(tokens, p) let body = r2["stmts"] let p = r2["pos"] + el_release(r2) return make_result({ "stmt": "For", "item": item_name, "list": list_expr, "body": body }, p) } @@ -1428,6 +1514,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r_try = parse_block(tokens, p) let try_body = r_try["stmts"] let p = r_try["pos"] + // r_try result map fully consumed — release to free peak heap. + el_release(r_try) let catch_name = "err" let k2 = tok_kind(tokens, p) if str_eq(k2, "Catch") { @@ -1449,6 +1537,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r_catch = parse_block(tokens, p) let catch_body = r_catch["stmts"] let p = r_catch["pos"] + // r_catch result map fully consumed — release to free peak heap. + el_release(r_catch) return make_result({ "stmt": "TryCatch", "try_body": try_body, "catch_name": catch_name, "catch_body": catch_body }, p) } return make_result({ "stmt": "TryCatch", "try_body": try_body, "catch_name": catch_name, "catch_body": native_list_empty() }, p) @@ -1472,6 +1562,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { "ret_type": inner["ret_type"], "decorator": dec_name } + // r result map fully consumed — release to free peak heap. + el_release(r) return make_result(with_dec, p2) } return r @@ -1592,6 +1684,28 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { }, p) } + // assert [ , ] + // The message is optional — if the next token after the condition is not a + // Comma, emit an empty string placeholder so the test still works. + if k == "Assert" { + let p: Int = pos + 1 + let cond_r = parse_expr(tokens, p) + let cond_node = cond_r["node"] + let p: Int = cond_r["pos"] + el_release(cond_r) + let after_k: String = tok_kind(tokens, p) + if str_eq(after_k, "Comma") { + let p = p + 1 + let msg_r = parse_expr(tokens, p) + let msg_node = msg_r["node"] + let p: Int = msg_r["pos"] + el_release(msg_r) + return make_result({ "stmt": "Assert", "cond": cond_node, "msg": msg_node }, p) + } + // No message — use empty string placeholder. + return make_result({ "stmt": "Assert", "cond": cond_node, "msg": { "expr": "Str", "value": "" } }, p) + } + // Bare reassignment: `name = expr`. Handled BEFORE the expression // fallback so we don't drop the assign on the floor and emit three // orphan expressions (the original silent-miscompile bug). El's `let` @@ -1606,6 +1720,8 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, p) let val = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) return make_result({ "stmt": "Assign", "name": name, "value": val }, p) } } @@ -1614,13 +1730,16 @@ fn parse_stmt(tokens: [Map], pos: Int) -> Map { let r = parse_expr(tokens, pos) let val = r["node"] let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) make_result({ "stmt": "Expr", "value": val }, p) } // -- Top-level parse ------------------------------------------------------------ -fn parse(tokens: [Map]) -> [Map] { - let total: Int = native_list_len(tokens) +fn parse(tokens: [Any]) -> [Map] { + // Flat list: 2 entries per token, so divide by 2 for token count. + let total: Int = native_list_len(tokens) / 2 let stmts: [Map] = native_list_empty() let pos: Int = 0 let running = true @@ -1635,6 +1754,8 @@ fn parse(tokens: [Map]) -> [Map] { let r = parse_stmt(tokens, pos) let stmt = r["node"] let new_pos: Int = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) let stmts = native_list_append(stmts, stmt) // Guard against infinite loops - if pos didn't advance, force it if new_pos <= pos { @@ -1647,3 +1768,297 @@ fn parse(tokens: [Map]) -> [Map] { } stmts } + +// -- Streaming parse helpers --------------------------------------------------- + +// parse_one — parse exactly one top-level statement at position `pos`. +// Returns { "node": stmt_map, "pos": new_pos }. +// Enables the streaming compiler pipeline (parse one → emit C → discard). +fn parse_one(tokens: [Any], pos: Int) -> Map { + parse_stmt(tokens, pos) +} + +// skip_to_rbrace — advance past a balanced { ... } block. +// On entry, pos must point at the LBrace token. +// Returns the position of the token AFTER the matching RBrace. +fn skip_to_rbrace(tokens: [Any], pos: Int) -> Int { + let total: Int = native_list_len(tokens) / 2 + let p: Int = pos + 1 + let depth: Int = 1 + let going: Bool = true + while going { + if p >= total { + let going = false + } else { + let kk: String = tok_kind(tokens, p) + if str_eq(kk, "Eof") { + let going = false + } else { + if str_eq(kk, "LBrace") { + let depth = depth + 1 + let p = p + 1 + } else { + if str_eq(kk, "RBrace") { + let depth = depth - 1 + let p = p + 1 + if depth <= 0 { + let going = false + } + } else { + let p = p + 1 + } + } + } + } + } + p +} + +// is_stmt_start_kind — true if `k` is a token kind that can start a new +// top-level statement (used to find safe stopping points during token skips). +fn is_stmt_start_kind(k: String) -> Bool { + if str_eq(k, "Fn") { return true } + if str_eq(k, "Let") { return true } + if str_eq(k, "Extern") { return true } + if str_eq(k, "Cgi") { return true } + if str_eq(k, "Service") { return true } + if str_eq(k, "Type") { return true } + if str_eq(k, "Enum") { return true } + if str_eq(k, "Import") { return true } + if str_eq(k, "From") { return true } + if str_eq(k, "Eof") { return true } + false +} + +// skip_expr_to_stmt_boundary — skip tokens from `pos` until we reach a +// token that could start a new top-level statement, staying depth-aware +// so that braces inside expressions don't fool us. +fn skip_expr_to_stmt_boundary(tokens: [Any], pos: Int) -> Int { + let total: Int = native_list_len(tokens) / 2 + let p: Int = pos + let depth: Int = 0 + let going: Bool = true + while going { + if p >= total { + let going = false + } else { + let kk: String = tok_kind(tokens, p) + if str_eq(kk, "Eof") { + let going = false + } else { + if str_eq(kk, "LBrace") { + let depth = depth + 1 + let p = p + 1 + } else { + if str_eq(kk, "RBrace") { + if depth <= 0 { + let going = false + } else { + let depth = depth - 1 + let p = p + 1 + } + } else { + if depth == 0 { + if is_stmt_start_kind(kk) { + let going = false + } else { + let p = p + 1 + } + } else { + let p = p + 1 + } + } + } + } + } + } + p +} + +// scan_params_c — scan a parameter list `(name: Type, ...)` starting at +// position `pos` (which should point at LParen) and return the C parameter +// declaration string along with the new position. +// Returns { "c": String, "pos": Int }. +// This avoids allocating param-map objects during the pre-scan phase. +fn scan_params_c(tokens: [Any], pos: Int) -> Map { + let p: Int = expect(tokens, pos, "LParen") + let parts: [String] = native_list_empty() + let going: Bool = true + while going { + let kk: String = tok_kind(tokens, p) + if str_eq(kk, "RParen") { + let going = false + } else { + if str_eq(kk, "Eof") { + let going = false + } else { + let pname: String = tok_value(tokens, p) + let p = p + 1 + let p = expect(tokens, p, "Colon") + let p = skip_type(tokens, p) + let parts = native_list_append(parts, "el_val_t " + pname) + let k2: String = tok_kind(tokens, p) + if str_eq(k2, "Comma") { + let p = p + 1 + } + } + } + } + let p = expect(tokens, p, "RParen") + let c_str: String = str_join(parts, ", ") + // parts list fully consumed — release to free peak heap. + el_release(parts) + if str_eq(c_str, "") { let c_str = "void" } + { "c": c_str, "pos": p } +} + +// scan_fn_sigs — lightweight token-level pre-scan. +// +// Returns a list of descriptor maps (one per top-level item) without building +// full expression ASTs or param-map objects. Descriptors have these shapes: +// +// fn/extern_fn: { "kind": "fn"|"extern_fn", "name": String, +// "params_c": String, <- C param decl string, e.g. "el_val_t a, el_val_t b" +// "is_main": Bool } +// toplevel_let: { "kind": "toplevel_let", "name": String, "ltype": String } +// cgi_block: { "kind": "cgi_block", "name": String } +// service_block: { "kind": "service_block", "name": String } +// +// Import/TypeDef/EnumDef nodes are skipped (codegen treats them as no-ops). +// +// The scan allocates only small string values per entry, keeping peak RSS low. +fn scan_fn_sigs(tokens: [Any]) -> [Map] { + let total: Int = native_list_len(tokens) / 2 + let sigs: [Map] = native_list_empty() + let pos: Int = 0 + let going: Bool = true + while going { + if pos >= total { + let going = false + } else { + let k: String = tok_kind(tokens, pos) + if str_eq(k, "Eof") { + let going = false + } else { + // --- fn definition --- + if str_eq(k, "Fn") { + let p: Int = pos + 1 + let name: String = tok_value(tokens, p) + let p = p + 1 + let r = scan_params_c(tokens, p) + let params_c: String = r["c"] + let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) + // skip return type + let k2: String = tok_kind(tokens, p) + if str_eq(k2, "Arrow") { + let p = p + 1 + let p = skip_type(tokens, p) + } + // skip body + let k3: String = tok_kind(tokens, p) + if str_eq(k3, "LBrace") { + let p = skip_to_rbrace(tokens, p) + } + let is_main: Bool = str_eq(name, "main") + let sigs = native_list_append(sigs, { + "kind": "fn", + "name": name, + "params_c": params_c, + "is_main": is_main + }) + let pos = p + } else { + // --- extern fn --- + if str_eq(k, "Extern") { + let p: Int = pos + 1 + let k2: String = tok_kind(tokens, p) + if str_eq(k2, "Fn") { + let p = p + 1 + let name: String = tok_value(tokens, p) + let p = p + 1 + let r = scan_params_c(tokens, p) + let params_c: String = r["c"] + let p = r["pos"] + // r result map fully consumed — release to free peak heap. + el_release(r) + let k3: String = tok_kind(tokens, p) + if str_eq(k3, "Arrow") { + let p = p + 1 + let p = skip_type(tokens, p) + } + let sigs = native_list_append(sigs, { + "kind": "extern_fn", + "name": name, + "params_c": params_c, + "is_main": false + }) + let pos = p + } else { + let pos = pos + 1 + } + } else { + // --- top-level let --- + if str_eq(k, "Let") { + let p: Int = pos + 1 + let name: String = tok_value(tokens, p) + let p = p + 1 + let ltype: String = "" + let k2: String = tok_kind(tokens, p) + if str_eq(k2, "Colon") { + let p = p + 1 + let kt: String = tok_kind(tokens, p) + if str_eq(kt, "Ident") { let ltype = tok_value(tokens, p) } + let p = skip_type(tokens, p) + } + let p = expect(tokens, p, "Eq") + let p = skip_expr_to_stmt_boundary(tokens, p) + let sigs = native_list_append(sigs, { + "kind": "toplevel_let", + "name": name, + "ltype": ltype + }) + let pos = p + } else { + // --- cgi block --- + if str_eq(k, "Cgi") { + let p: Int = pos + 1 + let name: String = tok_value(tokens, p) + let p = p + 1 + let k2: String = tok_kind(tokens, p) + if str_eq(k2, "LBrace") { + let p = skip_to_rbrace(tokens, p) + } + let sigs = native_list_append(sigs, { + "kind": "cgi_block", + "name": name + }) + let pos = p + } else { + // --- service block --- + if str_eq(k, "Service") { + let p: Int = pos + 1 + let name: String = tok_value(tokens, p) + let p = p + 1 + let k2: String = tok_kind(tokens, p) + if str_eq(k2, "LBrace") { + let p = skip_to_rbrace(tokens, p) + } + let sigs = native_list_append(sigs, { + "kind": "service_block", + "name": name + }) + let pos = p + } else { + // Import, Type, Enum, From, or any other token. + // Skip ahead to the next statement boundary. + let p: Int = pos + 1 + let p = skip_expr_to_stmt_boundary(tokens, p) + let pos = p + }}}}} + } + } + } + sigs +} diff --git a/lang/elb.el b/lang/elb.el index af878e7..dd9b298 100644 --- a/lang/elb.el +++ b/lang/elb.el @@ -225,6 +225,7 @@ fn compile_module(src_path: String, out_dir: String, elc_bin: String, dry_run: B let bname: String = basename_noext(src_path) let c_out: String = out_dir + "/" + bname + ".c" let elh_out: String = out_dir + "/" + bname + ".elh" + let err_tmp: String = "/tmp/elb-err-" + bname + ".txt" // Check if recompile needed if !file_is_newer(src_path, c_out) { @@ -234,18 +235,26 @@ fn compile_module(src_path: String, out_dir: String, elc_bin: String, dry_run: B return true } - // elc streams C to stdout (collect mode not yet implemented); use - // shell redirection so the output lands in the file, not the terminal. - let cmd: String = elc_bin + " --emit-header " + src_path + " > " + c_out + " 2>&1" + // elc streams C to stdout; redirect stderr to a temp file so we can + // surface the actual error message on failure instead of swallowing it. + let cmd: String = elc_bin + " --emit-header " + src_path + " > " + c_out + " 2>" + err_tmp println(" compile " + src_path) if dry_run { return true } let ret: Int = exec_command(cmd) if ret != 0 { + // Surface the actual compiler error from stderr + let err_msg: String = str_trim(fs_read(err_tmp)) + if !str_eq(err_msg, "") { + println(err_msg) + } + // Remove partial output so a retry starts clean + exec_command("rm -f " + c_out + " " + err_tmp) println("elb: compile failed: " + src_path) return false } + exec_command("rm -f " + err_tmp) // Move the generated .elh (written next to the source by elc) into // out_dir so that #include "module.elh" lines in the generated .c @@ -320,6 +329,20 @@ fn main() -> Void { runtime_path = elc_dir + "/../el-compiler/runtime/el_runtime.c" } } + // If --runtime points to a directory, auto-locate el_runtime.c inside it. + // This lets both forms work: + // --runtime=/opt/el/el-compiler/runtime (directory form) + // --runtime=/opt/el/el-compiler/runtime/el_runtime.c (file form) + if !str_eq(runtime_path, "") { + let is_dir: String = str_trim(exec_capture("test -d " + runtime_path + " && echo dir || echo file")) + if str_eq(is_dir, "dir") { + let candidate: String = runtime_path + "/el_runtime.c" + let has_file: String = str_trim(exec_capture("test -f " + candidate + " && echo yes || echo no")) + if str_eq(has_file, "yes") { + let runtime_path = candidate + } + } + } if str_eq(runtime_path, "") { println("elb: cannot locate el_runtime.c - use --runtime=PATH") exit(1) diff --git a/lang/tests/native/test_compiler.el b/lang/tests/native/test_compiler.el new file mode 100644 index 0000000..6798114 --- /dev/null +++ b/lang/tests/native/test_compiler.el @@ -0,0 +1,727 @@ +// tests/native/test_compiler.el — comprehensive tests for the El compiler pipeline. +// +// Tests the lexer (lexer.el), parser (parser.el), and codegen (codegen.el) +// through the compile() entry point in compiler.el. +// +// Compiled and run via the native test harness: +// elc --test tests/native/test_compiler.el > /tmp/el_compiler_tests.c +// gcc -O2 -I runtime /tmp/el_compiler_tests.c runtime/el_runtime.c -lcurl -lpthread -lm -o /tmp/el_compiler_tests +// /tmp/el_compiler_tests + +import "../../el-compiler/src/lexer.el" +import "../../el-compiler/src/parser.el" +import "../../el-compiler/src/codegen.el" +import "../../el-compiler/src/codegen-js.el" +import "../../el-compiler/src/compiler.el" + +// ── Lexer helpers ───────────────────────────────────────────────────────────── + +fn tok_count(tokens: [Any]) -> Int { + native_list_len(tokens) / 2 +} + +// ── Codegen helper: capture compile() stdout to a string ───────────────────── + +fn compile_capture(src: String) -> String { + let tmp: String = "/tmp/el_compiler_test_" + int_to_str(time_now()) + ".c" + stdout_to_file(tmp) + compile(src) + stdout_restore() + fs_read(tmp) +} + +// ── Lexer tests ─────────────────────────────────────────────────────────────── + +test "lex-empty" { + let tokens: [Any] = lex("") + assert tok_count(tokens) == 1, "empty source yields only Eof" + assert tok_kind(tokens, 0) == "Eof", "single token is Eof" +} + +test "lex-whitespace-stripped" { + let tokens: [Any] = lex(" \t\n\r ") + assert tok_count(tokens) == 1, "whitespace-only yields only Eof" +} + +test "lex-comment-stripped" { + let tokens: [Any] = lex("// this is a comment\n// another") + assert tok_count(tokens) == 1, "comments stripped — only Eof" +} + +test "lex-int-literals" { + let tokens: [Any] = lex("0 1 42 100 999") + assert tok_count(tokens) == 6, "five int literals + Eof" + assert tok_kind(tokens, 0) == "Int", "first is Int" + assert tok_value(tokens, 0) == "0", "value is 0" + assert tok_kind(tokens, 2) == "Int", "third is Int" + assert tok_value(tokens, 2) == "42", "value is 42" + assert tok_kind(tokens, 4) == "Int", "fifth is Int" + assert tok_value(tokens, 4) == "999", "value is 999" +} + +test "lex-float-literals" { + let tokens: [Any] = lex("3.14 0.0 1.5") + assert tok_count(tokens) == 4, "three float literals + Eof" + assert tok_kind(tokens, 0) == "Float", "first is Float" + assert tok_value(tokens, 0) == "3.14", "value is 3.14" + assert tok_kind(tokens, 1) == "Float", "second is Float" + assert tok_value(tokens, 1) == "0.0", "value is 0.0" +} + +test "lex-string-literals" { + let tokens: [Any] = lex("\"hello\" \"world\" \"\"") + assert tok_count(tokens) == 4, "three string literals + Eof" + assert tok_kind(tokens, 0) == "Str", "first is Str" + assert tok_value(tokens, 0) == "hello", "value is hello" + assert tok_kind(tokens, 2) == "Str", "third is Str" + assert tok_value(tokens, 2) == "", "empty string value is empty" +} + +test "lex-string-escape-newline" { + let tokens: [Any] = lex("\"hello\\nworld\"") + assert tok_count(tokens) == 2, "one Str token + Eof" + assert tok_kind(tokens, 0) == "Str", "is Str" + let val: String = tok_value(tokens, 0) + assert str_contains(val, "hello"), "value contains hello" + assert str_contains(val, "world"), "value contains world" + assert str_len(val) == 11, "hello + newline + world = 11 chars" +} + +test "lex-string-escape-tab" { + let tokens: [Any] = lex("\"a\\tb\"") + assert tok_count(tokens) == 2, "one Str + Eof" + let val: String = tok_value(tokens, 0) + assert str_len(val) == 3, "a + tab + b = 3 chars" +} + +test "lex-string-escape-backslash" { + let tokens: [Any] = lex("\"a\\\\b\"") + assert tok_count(tokens) == 2, "one Str + Eof" + let val: String = tok_value(tokens, 0) + assert str_len(val) == 3, "a + backslash + b = 3 chars" +} + +test "lex-bool-literals" { + let tokens: [Any] = lex("true false") + assert tok_count(tokens) == 3, "two Bool tokens + Eof" + assert tok_kind(tokens, 0) == "Bool", "first is Bool" + assert tok_value(tokens, 0) == "true", "first is true" + assert tok_kind(tokens, 1) == "Bool", "second is Bool" + assert tok_value(tokens, 1) == "false", "second is false" +} + +test "lex-identifier" { + let tokens: [Any] = lex("foo bar _under _123") + assert tok_count(tokens) == 5, "four idents + Eof" + assert tok_kind(tokens, 0) == "Ident", "foo is Ident" + assert tok_value(tokens, 0) == "foo", "value is foo" + assert tok_kind(tokens, 2) == "Ident", "underscore ident recognized" + assert tok_value(tokens, 2) == "_under", "value is _under" +} + +test "lex-keywords" { + let tokens: [Any] = lex("let fn if else while for return import type enum match") + assert tok_count(tokens) == 12, "eleven keywords + Eof" + assert tok_kind(tokens, 0) == "Let", "let keyword" + assert tok_kind(tokens, 1) == "Fn", "fn keyword" + assert tok_kind(tokens, 2) == "If", "if keyword" + assert tok_kind(tokens, 3) == "Else", "else keyword" + assert tok_kind(tokens, 4) == "While", "while keyword" + assert tok_kind(tokens, 5) == "For", "for keyword" + assert tok_kind(tokens, 6) == "Return", "return keyword" + assert tok_kind(tokens, 7) == "Import", "import keyword" + assert tok_kind(tokens, 8) == "Type", "type keyword" + assert tok_kind(tokens, 9) == "Enum", "enum keyword" + assert tok_kind(tokens, 10) == "Match", "match keyword" +} + +test "lex-more-keywords" { + let tokens: [Any] = lex("extern break continue") + assert tok_count(tokens) == 4, "three keywords + Eof" + assert tok_kind(tokens, 0) == "Extern", "extern keyword" + assert tok_kind(tokens, 1) == "Break", "break keyword" + assert tok_kind(tokens, 2) == "Continue", "continue keyword" +} + +test "lex-keyword-values" { + let tokens: [Any] = lex("let fn return") + assert tok_value(tokens, 0) == "let", "let value is let" + assert tok_value(tokens, 1) == "fn", "fn value is fn" + assert tok_value(tokens, 2) == "return", "return value is return" +} + +test "lex-arithmetic-operators" { + let tokens: [Any] = lex("+ - * / %") + assert tok_count(tokens) == 6, "five ops + Eof" + assert tok_kind(tokens, 0) == "Plus", "plus" + assert tok_kind(tokens, 1) == "Minus", "minus" + assert tok_kind(tokens, 2) == "Star", "star" + assert tok_kind(tokens, 3) == "Slash", "slash" + assert tok_kind(tokens, 4) == "Percent", "percent" +} + +test "lex-comparison-operators" { + let tokens: [Any] = lex("== != < > <= >=") + assert tok_count(tokens) == 7, "six ops + Eof" + assert tok_kind(tokens, 0) == "EqEq", "eqeq" + assert tok_value(tokens, 0) == "==", "eqeq value" + assert tok_kind(tokens, 1) == "NotEq", "noteq" + assert tok_kind(tokens, 2) == "Lt", "lt" + assert tok_kind(tokens, 3) == "Gt", "gt" + assert tok_kind(tokens, 4) == "LtEq", "lteq" + assert tok_kind(tokens, 5) == "GtEq", "gteq" +} + +test "lex-logical-operators" { + let tokens: [Any] = lex("&& || !") + assert tok_count(tokens) == 4, "three logical ops + Eof" + assert tok_kind(tokens, 0) == "And", "and" + assert tok_value(tokens, 0) == "&&", "and value" + assert tok_kind(tokens, 1) == "Or", "or" + assert tok_kind(tokens, 2) == "Not", "not" +} + +test "lex-arrow-tokens" { + let tokens: [Any] = lex("-> =>") + assert tok_count(tokens) == 3, "arrow + fat-arrow + Eof" + assert tok_kind(tokens, 0) == "Arrow", "thin arrow" + assert tok_value(tokens, 0) == "->", "arrow value" + assert tok_kind(tokens, 1) == "FatArrow", "fat arrow" +} + +test "lex-delimiters" { + let tokens: [Any] = lex("( ) [ ] { } , : ; .") + assert tok_count(tokens) == 11, "ten delimiters + Eof" + assert tok_kind(tokens, 0) == "LParen", "lparen" + assert tok_kind(tokens, 1) == "RParen", "rparen" + assert tok_kind(tokens, 2) == "LBracket", "lbracket" + assert tok_kind(tokens, 3) == "RBracket", "rbracket" + assert tok_kind(tokens, 4) == "LBrace", "lbrace" + assert tok_kind(tokens, 5) == "RBrace", "rbrace" + assert tok_kind(tokens, 6) == "Comma", "comma" + assert tok_kind(tokens, 7) == "Colon", "colon" + assert tok_kind(tokens, 8) == "Semicolon", "semicolon" + assert tok_kind(tokens, 9) == "Dot", "dot" +} + +test "lex-double-colon" { + let tokens: [Any] = lex("::") + assert tok_count(tokens) == 2, "colons + Eof" + assert tok_kind(tokens, 0) == "ColonColon", "double colon" + assert tok_value(tokens, 0) == "::", "double colon value" +} + +test "lex-dot-dot" { + let tokens: [Any] = lex(".. ..=") + assert tok_count(tokens) == 3, "two range tokens + Eof" + assert tok_kind(tokens, 0) == "DotDot", "dotdot" + assert tok_kind(tokens, 1) == "DotDotEq", "dotdoteq" +} + +test "lex-pipe-operators" { + let tokens: [Any] = lex("| || |>") + assert tok_count(tokens) == 4, "three pipe tokens + Eof" + assert tok_kind(tokens, 0) == "Pipe", "pipe" + assert tok_kind(tokens, 1) == "Or", "or" + assert tok_kind(tokens, 2) == "PipeOp", "pipe-op" +} + +test "lex-at-and-question" { + let tokens: [Any] = lex("@ ?") + assert tok_count(tokens) == 3, "at + question + Eof" + assert tok_kind(tokens, 0) == "At", "at sign" + assert tok_kind(tokens, 1) == "QuestionMark", "question mark" +} + +test "lex-eof-always-last" { + let t1: [Any] = lex("x") + let t2: [Any] = lex("let x = 1") + let t3: [Any] = lex("") + let n1: Int = tok_count(t1) + let n2: Int = tok_count(t2) + let n3: Int = tok_count(t3) + assert tok_kind(t1, n1 - 1) == "Eof", "eof last after single ident" + assert tok_kind(t2, n2 - 1) == "Eof", "eof last after let stmt" + assert tok_kind(t3, n3 - 1) == "Eof", "eof last after empty" +} + +test "lex-string-with-spaces" { + let tokens: [Any] = lex("\"hello world\"") + assert tok_count(tokens) == 2, "string with space: 1 Str + Eof" + assert tok_value(tokens, 0) == "hello world", "internal space preserved" +} + +test "lex-multiline-source" { + let src: String = "let x: Int = 1\nlet y: Int = 2\n" + let tokens: [Any] = lex(src) + assert tok_count(tokens) > 5, "multiline source produces multiple tokens" + assert tok_kind(tokens, 0) == "Let", "first token is Let" +} + +test "lex-flat-stride-2-layout" { + // Verify that the flat stride-2 layout: token i has kind at index 2*i, value at 2*i+1 + let tokens: [Any] = lex("fn foo") + // tokens[0] = "Fn", tokens[1] = "fn", tokens[2] = "Ident", tokens[3] = "foo", ... + let raw_len: Int = native_list_len(tokens) + assert raw_len == 6, "fn + foo + Eof = 3 tokens = 6 raw entries" + let kind0: String = native_list_get(tokens, 0) + let val0: String = native_list_get(tokens, 1) + let kind1: String = native_list_get(tokens, 2) + let val1: String = native_list_get(tokens, 3) + assert kind0 == "Fn", "raw[0] is Fn kind" + assert val0 == "fn", "raw[1] is fn value" + assert kind1 == "Ident", "raw[2] is Ident kind" + assert val1 == "foo", "raw[3] is foo value" +} + +// ── Parser tests ────────────────────────────────────────────────────────────── + +fn get_first_stmt_kind(src: String) -> String { + let tokens: [Any] = lex(src) + let stmts: [Map] = parse(tokens) + if native_list_len(stmts) == 0 { return "" } + let first: Map = native_list_get(stmts, 0) + first["stmt"] +} + +fn get_first_stmt(src: String) -> Map { + let tokens: [Any] = lex(src) + let stmts: [Map] = parse(tokens) + native_list_get(stmts, 0) +} + +test "parse-let-stmt" { + assert get_first_stmt_kind("let x: Int = 5") == "Let", "let int stmt" + assert get_first_stmt_kind("let s: String = \"hi\"") == "Let", "let string stmt" + assert get_first_stmt_kind("let b: Bool = true") == "Let", "let bool stmt" + let stmt: Map = get_first_stmt("let x: Int = 42") + let name: String = stmt["name"] + assert name == "x", "let name is x" +} + +test "parse-fn-decl" { + assert get_first_stmt_kind("fn foo() -> Void { }") == "FnDef", "fn declaration" + assert get_first_stmt_kind("fn bar(x: Int) -> Int { return x }") == "FnDef", "fn with param" + let stmt: Map = get_first_stmt("fn foo() -> Void { }") + let name: String = stmt["name"] + assert name == "foo", "fn name is foo" +} + +test "parse-fn-params" { + let stmt: Map = get_first_stmt("fn bar(x: Int, y: String) -> Int { return 0 }") + let params = stmt["params"] + let n: Int = native_list_len(params) + assert n == 2, "fn has 2 params" + let p0: Map = native_list_get(params, 0) + let p1: Map = native_list_get(params, 1) + assert p0["name"] == "x", "first param name x" + assert p1["name"] == "y", "second param name y" +} + +test "parse-return-stmt" { + let tokens: [Any] = lex("fn f() -> Int { return 42 }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let n: Int = native_list_len(body) + assert n > 0, "fn body non-empty" + let ret: Map = native_list_get(body, 0) + assert ret["stmt"] == "Return", "return stmt kind" +} + +test "parse-if-stmt" { + // In El, `if` is an expression. Standalone `if` in a fn body is wrapped + // as Expr stmt with value.expr == "If". + let tokens: [Any] = lex("fn f() -> Int { if x > 0 { return 1 } return 0 }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let first_body: Map = native_list_get(body, 0) + assert first_body["stmt"] == "Expr", "if stmt in fn body is Expr wrapper" + let val = first_body["value"] + assert val["expr"] == "If", "Expr wraps If expression" +} + +test "parse-if-else" { + let tokens: [Any] = lex("fn f() -> Int { if x > 0 { return 1 } else { return 0 } }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + // if-else is also an Expr stmt wrapping an If expression + let expr_stmt: Map = native_list_get(body, 0) + assert expr_stmt["stmt"] == "Expr", "if-else is Expr stmt" + let if_node = expr_stmt["value"] + assert if_node["expr"] == "If", "Expr wraps If expression" + let has_else: Bool = if_node["has_else"] + assert has_else, "if-else has else branch" +} + +test "parse-while-stmt" { + let tokens: [Any] = lex("fn f() -> Void { while i < 10 { i = i + 1 } }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let while_node: Map = native_list_get(body, 0) + assert while_node["stmt"] == "While", "while stmt kind" +} + +test "parse-import-stmt" { + assert get_first_stmt_kind("import \"some/module.el\"") == "Import", "import stmt" +} + +test "parse-extern-fn" { + assert get_first_stmt_kind("extern fn native_op(x: Int) -> Int") == "ExternFn", "extern fn" +} + +test "parse-let-int-value" { + let stmt: Map = get_first_stmt("let n: Int = 99") + let val = stmt["value"] + let val_kind: String = val["expr"] + assert val_kind == "Int", "let value is Int expr" + let v: String = val["value"] + assert v == "99", "int literal value 99" +} + +test "parse-let-string-value" { + let stmt: Map = get_first_stmt("let s: String = \"hello\"") + let val = stmt["value"] + let val_kind: String = val["expr"] + assert val_kind == "Str", "let value is Str expr" + let v: String = val["value"] + assert v == "hello", "string literal value hello" +} + +test "parse-let-bool-value" { + let stmt: Map = get_first_stmt("let b: Bool = true") + let val = stmt["value"] + let val_kind: String = val["expr"] + assert val_kind == "Bool", "let value is Bool expr" +} + +test "parse-binop-expr" { + let stmt: Map = get_first_stmt("let x: Int = 1 + 2") + let val = stmt["value"] + let val_kind: String = val["expr"] + assert val_kind == "BinOp", "let value is BinOp" + let op: String = val["op"] + assert op == "Plus", "binop is Plus" +} + +test "parse-call-expr" { + let tokens: [Any] = lex("fn f() -> Void { println(\"hi\") }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let expr_stmt: Map = native_list_get(body, 0) + assert expr_stmt["stmt"] == "Expr", "call is Expr stmt" + let val = expr_stmt["value"] + let val_kind: String = val["expr"] + assert val_kind == "Call", "expr is Call" +} + +test "parse-multiple-fns" { + let src: String = "fn a() -> Int { return 1 }\nfn b() -> Int { return 2 }" + let tokens: [Any] = lex(src) + let stmts: [Map] = parse(tokens) + assert native_list_len(stmts) == 2, "two fn declarations parsed" + let s0: Map = native_list_get(stmts, 0) + let s1: Map = native_list_get(stmts, 1) + assert s0["name"] == "a", "first fn name a" + assert s1["name"] == "b", "second fn name b" +} + +test "parse-assign-stmt" { + let tokens: [Any] = lex("fn f() -> Void { x = 42 }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let a: Map = native_list_get(body, 0) + assert a["stmt"] == "Assign", "assign stmt kind" + assert a["name"] == "x", "assign target x" +} + +test "parse-for-stmt" { + let tokens: [Any] = lex("fn f() -> Void { for x in items { println(x) } }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let for_node: Map = native_list_get(body, 0) + assert for_node["stmt"] == "For", "for stmt kind" + assert for_node["item"] == "x", "for item is x" +} + +test "parse-unary-not" { + let tokens: [Any] = lex("fn f() -> Bool { return !x }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let ret: Map = native_list_get(body, 0) + let val = ret["value"] + assert val["expr"] == "Not", "unary not is Not expr" +} + +test "parse-unary-neg" { + let tokens: [Any] = lex("fn f() -> Int { return -5 }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let ret: Map = native_list_get(body, 0) + let val = ret["value"] + assert val["expr"] == "Neg", "unary minus is Neg expr" +} + +test "parse-array-literal" { + let tokens: [Any] = lex("fn f() -> [Int] { return [1, 2, 3] }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let ret: Map = native_list_get(body, 0) + let val = ret["value"] + assert val["expr"] == "Array", "array literal is Array expr" + let elems = val["elems"] + assert native_list_len(elems) == 3, "array has 3 elements" +} + +test "parse-empty-array" { + let tokens: [Any] = lex("fn f() -> [Int] { return [] }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let ret: Map = native_list_get(body, 0) + let val = ret["value"] + assert val["expr"] == "Array", "empty array is Array expr" + let elems = val["elems"] + assert native_list_len(elems) == 0, "empty array has 0 elements" +} + +test "parse-index-expr" { + let tokens: [Any] = lex("fn f() -> Any { return arr[0] }") + let stmts: [Map] = parse(tokens) + let fn_node: Map = native_list_get(stmts, 0) + let body = fn_node["body"] + let ret: Map = native_list_get(body, 0) + let val = ret["value"] + assert val["expr"] == "Index", "array index is Index expr" +} + +// ── Codegen tests ───────────────────────────────────────────────────────────── + +test "codegen-includes" { + let out: String = compile_capture("fn main() -> Void { }") + assert str_contains(out, "#include"), "output has #include" + assert str_contains(out, "el_runtime.h"), "output includes el_runtime.h" +} + +test "codegen-int-main" { + let out: String = compile_capture("fn main() -> Void { }") + assert str_contains(out, "int main("), "output has int main()" +} + +test "codegen-runtime-init" { + let out: String = compile_capture("fn main() -> Void { }") + assert str_contains(out, "el_runtime_init_args("), "runtime init in main" +} + +test "codegen-void-function-signature" { + let out: String = compile_capture("fn f() -> Int { return 0 }") + assert str_contains(out, "f(void)"), "no-param fn uses void signature" +} + +test "codegen-function-with-params" { + let out: String = compile_capture("fn add(x: Int, y: Int) -> Int { return x + y }") + assert str_contains(out, "add("), "function add in output" + assert str_contains(out, "el_val_t x"), "param x in output" + assert str_contains(out, "el_val_t y"), "param y in output" +} + +test "codegen-int-literal" { + let out: String = compile_capture("fn answer() -> Int { return 42 }") + assert str_contains(out, "42"), "integer literal 42 in output" + assert str_contains(out, "return"), "return statement in output" +} + +test "codegen-string-literal" { + let out: String = compile_capture("fn greet() -> String { return \"hello\" }") + assert str_contains(out, "hello"), "string literal hello in output" +} + +test "codegen-if-statement" { + let src: String = "fn check(x: Int) -> Int { if x > 0 { return 1 } return 0 }" + let out: String = compile_capture(src) + assert str_contains(out, "if ("), "if statement in C output" +} + +test "codegen-if-else" { + let src: String = "fn check(x: Int) -> Int { if x > 0 { return 1 } else { return 0 } }" + let out: String = compile_capture(src) + assert str_contains(out, "if ("), "if in output" + assert str_contains(out, "} else {"), "else branch in output" +} + +test "codegen-while-loop" { + let src: String = "fn f() -> Int { let i: Int = 0 while i < 10 { i = i + 1 } return i }" + let out: String = compile_capture(src) + assert str_contains(out, "while ("), "while loop in C output" +} + +test "codegen-let-binding" { + let src: String = "fn f() -> Int { let n: Int = 5 return n }" + let out: String = compile_capture(src) + assert str_contains(out, "el_val_t n"), "let binding in output" +} + +test "codegen-function-call" { + let src: String = "fn f() -> Void { println(\"hi\") }" + let out: String = compile_capture(src) + assert str_contains(out, "println("), "function call in output" +} + +test "codegen-string-concat" { + let src: String = "fn f() -> String { let a: String = \"x\" let b: String = \"y\" return a + b }" + let out: String = compile_capture(src) + assert str_contains(out, "el_str_concat"), "string concat uses el_str_concat" +} + +test "codegen-int-arithmetic" { + let src: String = "fn f(x: Int, y: Int) -> Int { return x + y }" + let out: String = compile_capture(src) + assert !str_contains(out, "el_str_concat(x"), "int add does not use el_str_concat" +} + +test "codegen-comparison" { + let src: String = "fn f(x: Int) -> Bool { return x > 0 }" + let out: String = compile_capture(src) + assert str_contains(out, ">"), "comparison in output" +} + +test "codegen-string-equality" { + let src: String = "fn f(s: String) -> Bool { return s == \"hello\" }" + let out: String = compile_capture(src) + assert str_contains(out, "str_eq("), "string equality uses str_eq" +} + +test "codegen-logical-and" { + let src: String = "fn f(a: Bool, b: Bool) -> Bool { return a && b }" + let out: String = compile_capture(src) + assert str_contains(out, "&&"), "logical and in output" +} + +test "codegen-logical-or" { + let src: String = "fn f(a: Bool, b: Bool) -> Bool { return a || b }" + let out: String = compile_capture(src) + assert str_contains(out, "||"), "logical or in output" +} + +test "codegen-unary-not" { + let src: String = "fn f(b: Bool) -> Bool { return !b }" + let out: String = compile_capture(src) + assert str_contains(out, "!"), "unary not in output" +} + +test "codegen-string-escape-in-c" { + let src: String = "fn msg() -> String { return \"hello\\nworld\\t!\" }" + let out: String = compile_capture(src) + assert str_contains(out, "\\n"), "newline escape in C output" + assert str_contains(out, "\\t"), "tab escape in C output" +} + +test "codegen-many-functions" { + // Multiple functions — exercises streaming loop + per-function arena scoping + let src: String = "fn a() -> Int { return 1 }\nfn b() -> Int { return 2 }\nfn c() -> Int { return 3 }\nfn d() -> Int { return 4 }\nfn e() -> Int { return 5 }" + let out: String = compile_capture(src) + assert str_contains(out, "el_val_t a("), "function a in output" + assert str_contains(out, "el_val_t b("), "function b in output" + assert str_contains(out, "el_val_t c("), "function c in output" + assert str_contains(out, "el_val_t d("), "function d in output" + assert str_contains(out, "el_val_t e("), "function e in output" +} + +test "codegen-deep-expression" { + // Deeply nested arithmetic — exercises recursive cg_expr + per-statement arena + let src: String = "fn deep() -> Int { return 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 }" + let out: String = compile_capture(src) + assert str_contains(out, "return"), "deep expr: return present" + assert str_contains(out, "8"), "deep expr: literal 8 present" +} + +test "codegen-forward-declarations" { + // Functions should have forward declarations before definitions + let src: String = "fn b() -> Int { return a() }\nfn a() -> Int { return 1 }" + let out: String = compile_capture(src) + assert str_contains(out, "el_val_t a("), "function a in output" + assert str_contains(out, "el_val_t b("), "function b in output" +} + +test "codegen-for-loop" { + let src: String = "fn f() -> Void { let items: [Int] = native_list_empty() for item in items { println(item) } }" + let out: String = compile_capture(src) + assert str_contains(out, "for ("), "for loop in C output" + assert str_contains(out, "el_list_get("), "for loop uses el_list_get" +} + +test "codegen-extern-fn" { + let src: String = "extern fn my_native(x: Int) -> Int\nfn use_it() -> Int { return my_native(1) }" + let out: String = compile_capture(src) + assert str_contains(out, "my_native("), "extern fn referenced in output" +} + +test "codegen-nested-calls" { + let src: String = "fn f() -> String { return str_concat(int_to_str(42), \" ok\") }" + let out: String = compile_capture(src) + assert str_contains(out, "str_concat"), "nested calls: str_concat in output" + assert str_contains(out, "int_to_str"), "nested calls: int_to_str in output" +} + +// ── Self-host / smoke tests ─────────────────────────────────────────────────── + +test "compiler-minimal-program" { + let src: String = "fn main() -> Void { println(\"ok\") }" + let out: String = compile_capture(src) + assert str_contains(out, "#include"), "has #include" + assert str_contains(out, "int main("), "has int main()" + assert str_contains(out, "println("), "calls println" + assert str_contains(out, "el_runtime.h"), "links el_runtime.h" +} + +test "compiler-pure-library" { + // No fn main = library mode: codegen_streaming returns before emitting main() + let src: String = "fn helper(x: Int) -> Int { return x + 1 }" + let out: String = compile_capture(src) + assert !str_contains(out, "int main("), "library: no int main" + assert str_contains(out, "#include"), "library: has includes" + assert str_contains(out, "helper("), "library: helper function present" +} + +test "compiler-multiple-fns-with-main" { + let src: String = "fn greet(name: String) -> String { return \"Hello \" + name }\nfn main() -> Void { println(greet(\"world\")) }" + let out: String = compile_capture(src) + assert str_contains(out, "greet("), "greet in output" + assert str_contains(out, "int main("), "main in output" + assert str_contains(out, "println("), "println in output" +} + +test "compiler-let-in-main" { + let src: String = "fn main() -> Void { let x: Int = 42 println(int_to_str(x)) }" + let out: String = compile_capture(src) + assert str_contains(out, "el_val_t x"), "let binding x in output" + assert str_contains(out, "42"), "literal 42 in output" +} + +test "compiler-string-concat-chain" { + let src: String = "fn f() -> String { let a: String = \"x\" let b: String = \"y\" let c: String = \"z\" return a + b + c }" + let out: String = compile_capture(src) + assert str_contains(out, "el_str_concat"), "string chain uses el_str_concat" +} + +test "compiler-negative-literal" { + let src: String = "fn f() -> Int { return -42 }" + let out: String = compile_capture(src) + assert str_contains(out, "42"), "negative literal value in output" +} + +test "compiler-stdint-include" { + // The generated C should include stdint.h for int64_t + let src: String = "fn f() -> Int { return 0 }" + let out: String = compile_capture(src) + assert str_contains(out, "stdint.h"), "output includes stdint.h" +}