Compare commits

..

6 Commits

Author SHA1 Message Date
will.anderson f5dcca0386 build: update dist/platform/elc with OOM fix and memory guard
El SDK CI - dev / build-and-test (pull_request) Successful in 4m16s
Rebuilt from fix/elc-oom-checkout: scan_fn_sigs_el() --emit-header path
+ el_mem_check() guard. Verified on checkout.el: all 3 sigs in .elh,
clean exit under normal load, exit(1) on memory limit exceeded.
2026-05-08 08:23:07 -05:00
will.anderson 53e0b99d5f fix(elc): add el_mem_check() memory guard — abort before OS OOM-kill
Add el_mem_check() to el_runtime.c: reads ELC_MAX_MEM_MB (default 512),
checks RSS via getrusage (macOS bytes / Linux KB normalised to MB), prints
a clear diagnostic to stderr and exits(1) if exceeded.

Wire it into two places:
- compiler.el: upfront check at --emit-header entry point
- codegen.el: per-function check in the streaming loop after each
  el_arena_pop, so runaway growth is caught at the earliest function
  boundary rather than after the machine is already dying.
2026-05-08 08:21:38 -05:00
will.anderson 5f9cad5908 fix(elc): eliminate OOM in --emit-header by using token-level signature scan
The --emit-header path previously called parse() which builds the entire
program AST in memory before writing the .elh file. For checkout.el (~491
lines with HTML template trees and deep BinOp string-concat chains), this
exhausted memory before the header could be written.

Fix: replace parse() + emit_header() with scan_fn_sigs_el() +
emit_header_from_sigs(). The new path tokenises the source once, then
walks the flat token list skipping over function bodies entirely — peak
memory is O(tokens) instead of O(whole-program AST).

New functions in parser.el:
- scan_type_el: reads a type annotation and returns its El source string
- scan_params_el: reads (name: Type, ...) and returns El params string
- scan_fn_sigs_el: token-level scan that collects El-style fn signatures
  without building any expression AST nodes

New function in compiler.el:
- emit_header_from_sigs: writes .elh from scan_fn_sigs_el output

Self-hosting check: elc compiled with new elc, diff of outputs is
identical (zero difference).

Smoke test: elc --emit-header checkout.el produces correct three-entry
.elh (previously truncated at two entries due to mid-parse OOM).
2026-05-08 08:20:13 -05:00
will.anderson 00629b39c4 Merge pull request 'fix(parser): str_join separator '' not ' ' — CSS selectors were emitting spaces' (#45) from fix/css-str-join-separator into dev
El SDK CI - dev / build-and-test (push) Failing after 12m6s
2026-05-07 23:00:19 +00:00
will.anderson ca1e4d57b8 Merge pull request 'ci: add three-tier ci-base rebuild (dev/stage)' (#44) from fix/html-template-if-style-script into dev
El SDK CI - dev / build-and-test (push) Has been cancelled
2026-05-07 23:00:13 +00:00
will.anderson 81a1a624f1 add three-tier ci-base rebuild (dev/stage) to CI workflows
El SDK CI - dev / build-and-test (pull_request) Successful in 3m49s
2026-05-07 15:51:24 -05:00
8 changed files with 397 additions and 5 deletions
+47
View File
@@ -252,4 +252,51 @@ jobs:
--source=el-compiler/runtime/el_runtime.js
echo "Published El SDK version=${VERSION} to foundation-dev"
# Keep key alive for the ci-base rebuild step below
# (deleted in that step after docker push)
- name: Rebuild ci-base with fresh El SDK (dev)
# Patches ci-base:dev in-place: pulls the existing image (which has all
# system deps — Node, Go, gcloud, Docker CLI, etc.) and overlays the freshly
# built El SDK on top. Keeps the full ci-base rebuild fast and incremental.
if: github.event_name == 'push'
env:
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
run: |
set -euo pipefail
CI_BASE="us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base"
SHA="${GITHUB_SHA:0:8}"
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
gcloud config set project neuron-785695
gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
# Pull existing ci-base:dev (system deps stay cached in the base layer)
docker pull "${CI_BASE}:dev" || docker pull "${CI_BASE}:latest"
# Inline Dockerfile — only replaces the El SDK layer
cat > /tmp/Dockerfile.ci-base-patch << 'EOF'
ARG BASE
FROM ${BASE}
COPY dist/platform/elc /opt/el/dist/platform/elc
COPY dist/bin/elb /opt/el/dist/bin/elb
COPY el-compiler/runtime/el_runtime.c /opt/el/el-compiler/runtime/el_runtime.c
COPY el-compiler/runtime/el_runtime.h /opt/el/el-compiler/runtime/el_runtime.h
COPY el-compiler/runtime/el_runtime.js /opt/el/el-compiler/runtime/el_runtime.js
RUN chmod +x /opt/el/dist/platform/elc /opt/el/dist/bin/elb
EOF
docker build \
--build-arg BASE="${CI_BASE}:dev" \
--build-arg BUILDKIT_INLINE_CACHE=1 \
-f /tmp/Dockerfile.ci-base-patch \
-t "${CI_BASE}:dev" \
-t "${CI_BASE}:dev-${SHA}" \
.
docker push "${CI_BASE}:dev"
docker push "${CI_BASE}:dev-${SHA}"
echo "ci-base rebuilt: ${CI_BASE}:dev (${SHA})"
rm -f /tmp/gcp-key.json
+47
View File
@@ -246,4 +246,51 @@ jobs:
--source=el-compiler/runtime/el_runtime.h
echo "Published El SDK version=${VERSION} to foundation-stage"
# Keep key alive for the ci-base rebuild step below
# (deleted in that step after docker push)
- name: Rebuild ci-base with fresh El SDK (stage)
# Patches ci-base:stage in-place: pulls the existing image (which has all
# system deps — Node, Go, gcloud, Docker CLI, etc.) and overlays the freshly
# built El SDK on top. Keeps the full ci-base rebuild fast and incremental.
if: github.event_name == 'push'
env:
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
run: |
set -euo pipefail
CI_BASE="us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base"
SHA="${GITHUB_SHA:0:8}"
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
gcloud config set project neuron-785695
gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
# Pull existing ci-base:stage (system deps stay cached in the base layer)
docker pull "${CI_BASE}:stage" || docker pull "${CI_BASE}:latest"
# Inline Dockerfile — only replaces the El SDK layer
cat > /tmp/Dockerfile.ci-base-patch << 'EOF'
ARG BASE
FROM ${BASE}
COPY dist/platform/elc /opt/el/dist/platform/elc
COPY dist/bin/elb /opt/el/dist/bin/elb
COPY el-compiler/runtime/el_runtime.c /opt/el/el-compiler/runtime/el_runtime.c
COPY el-compiler/runtime/el_runtime.h /opt/el/el-compiler/runtime/el_runtime.h
COPY el-compiler/runtime/el_runtime.js /opt/el/el-compiler/runtime/el_runtime.js
RUN chmod +x /opt/el/dist/platform/elc /opt/el/dist/bin/elb
EOF
docker build \
--build-arg BASE="${CI_BASE}:stage" \
--build-arg BUILDKIT_INLINE_CACHE=1 \
-f /tmp/Dockerfile.ci-base-patch \
-t "${CI_BASE}:stage" \
-t "${CI_BASE}:stage-${SHA}" \
.
docker push "${CI_BASE}:stage"
docker push "${CI_BASE}:stage-${SHA}"
echo "ci-base rebuilt: ${CI_BASE}:stage (${SHA})"
rm -f /tmp/gcp-key.json
BIN
View File
Binary file not shown.
+45
View File
@@ -42,6 +42,7 @@
#include <dirent.h>
#include <errno.h>
#include <pthread.h>
#include <sys/resource.h> /* getrusage — memory guard */
#ifdef HAVE_CURL
#include <curl/curl.h>
#endif
@@ -5674,6 +5675,50 @@ el_val_t getpid_now(void) {
return (el_val_t)getpid();
}
/* el_mem_check — self-terminating memory guard for long-running compiler runs.
*
* Call this periodically (e.g. after each function compiled) to detect runaway
* memory growth before the OS OOM-killer fires. Reads the limit from the env
* var ELC_MAX_MEM_MB (default 512 MB). If resident set size exceeds the limit,
* prints a diagnostic to stderr and exits with code 1 so the caller (elb or a
* CI script) can handle the failure gracefully instead of having the whole
* machine go down.
*
* Platform notes:
* macOS ru_maxrss is in bytes.
* Linux ru_maxrss is in kilobytes.
* We normalise to MB before comparing.
*
* Returns 0 always (the only non-return path is the exit() branch).
*/
el_val_t el_mem_check(void) {
/* Read limit from env; default 512 MB. */
long limit_mb = 512;
const char *env_val = getenv("ELC_MAX_MEM_MB");
if (env_val && *env_val) {
long v = atol(env_val);
if (v > 0) limit_mb = v;
}
struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) != 0) return 0; /* can't read — skip check */
long rss_mb;
#if defined(__APPLE__) || defined(__MACH__)
/* macOS: ru_maxrss is bytes */
rss_mb = (long)(ru.ru_maxrss / (1024L * 1024L));
#else
/* Linux: ru_maxrss is kilobytes */
rss_mb = (long)(ru.ru_maxrss / 1024L);
#endif
if (rss_mb >= limit_mb) {
fprintf(stderr, "elc: memory limit exceeded (%ldMB), aborting\n", limit_mb);
exit(1);
}
return 0;
}
/* ── args() — command-line argument access ──────────────────────────────────
* Compiled El programs call args() to get a list of CLI arguments.
* Call el_runtime_init_args(argc, argv) at the start of C main() to populate.
+6
View File
@@ -531,6 +531,12 @@ el_val_t parse_int(el_val_t s, el_val_t default_val);
el_val_t exit_program(el_val_t code);
el_val_t getpid_now(void);
/* Self-terminating memory guard. Reads ELC_MAX_MEM_MB (default 512) and
* exits with code 1 if resident memory exceeds the limit. Call periodically
* during long compilation loops (e.g. after each function is compiled).
* Returns 0 when memory is within bounds. */
el_val_t el_mem_check(void);
/* ── CGI identity ─────────────────────────────────────────────────────────────
* Called at the start of main() in CGI programs (those with a `cgi {}` block).
* Records the program's DHARMA identity before any other code executes. */
+1
View File
@@ -3730,6 +3730,7 @@ fn codegen_streaming(tokens: [Any], sigs: [Map<String, Any>], source: String) ->
cg_fn(stmt)
el_release(stmt)
el_arena_pop(fn_arena_mark)
el_mem_check()
}
} else {
if is_top_level_decl(stmt) {
+38 -5
View File
@@ -287,6 +287,9 @@ fn type_node_to_el(t: Map<String, Any>) -> String {
// emit_header write a .elh file from parsed statements.
// Scans for FnDef nodes and emits 'extern fn' declarations.
// NOTE: This function requires the full AST. Prefer emit_header_from_sigs
// for the --emit-header path it works from a token-level scan without
// building expression ASTs, avoiding OOM on large files.
fn emit_header(stmts: [Map<String, Any>], hdr_path: String) -> Void {
let n: Int = native_list_len(stmts)
let i = 0
@@ -325,6 +328,32 @@ fn emit_header(stmts: [Map<String, Any>], hdr_path: String) -> Void {
let ok: Bool = fs_write(hdr_path, content)
}
// emit_header_from_sigs write a .elh file from pre-scanned El signatures.
// Uses the output of scan_fn_sigs_el() no full AST required.
// Peak memory is O(tokens) rather than O(whole-program AST), which prevents
// OOM on large files with HTML template bodies or deep BinOp chains.
fn emit_header_from_sigs(sigs: [Map<String, Any>], hdr_path: String) -> Void {
let n: Int = native_list_len(sigs)
let i: Int = 0
let parts: [String] = native_list_empty()
let parts = native_list_append(parts, "// auto-generated by elc --emit-header — do not edit\n")
while i < n {
let sig = native_list_get(sigs, i)
let kind: String = sig["kind"]
if str_eq(kind, "fn") {
let name: String = sig["name"]
let params_el: String = sig["params_el"]
let ret_el: String = sig["ret_el"]
if str_eq(ret_el, "") { let ret_el = "Any" }
let line: String = "extern fn " + name + "(" + params_el + ") -> " + ret_el
let parts = native_list_append(parts, line + "\n")
}
let i = i + 1
}
let content: String = str_join(parts, "")
let ok: Bool = fs_write(hdr_path, content)
}
// Import resolution
//
// elc supports two forms of import:
@@ -536,16 +565,20 @@ fn main() -> Void {
let src_path: String = native_list_get(positional, 0)
// When --emit-header is requested, parse the source file directly
// (without inlining imports) and write out a .elh file alongside the .c.
// When --emit-header is requested, lex the source file and do a
// token-level signature scan (no full AST) to write a .elh file.
// This avoids OOM on large files with HTML template bodies or deep
// BinOp chains (e.g. checkout.el) parse() builds O(whole-program AST)
// while scan_fn_sigs_el keeps peak memory at O(tokens).
if do_emit_header {
el_mem_check()
let raw_source: String = fs_read(src_path)
let hdr_tokens: [Any] = lex(raw_source)
let hdr_stmts: [Map<String, Any>] = parse(hdr_tokens)
let hdr_sigs: [Map<String, Any>] = scan_fn_sigs_el(hdr_tokens)
el_release(hdr_tokens)
let hdr_path: String = str_slice(src_path, 0, str_len(src_path) - 3) + ".elh"
emit_header(hdr_stmts, hdr_path)
el_release(hdr_stmts)
emit_header_from_sigs(hdr_sigs, hdr_path)
el_release(hdr_sigs)
}
let source: String = resolve_imports(src_path)
+213
View File
@@ -2065,6 +2065,219 @@ fn skip_expr_to_stmt_boundary(tokens: [Any], pos: Int) -> Int {
p
}
// scan_type_el read a type annotation starting at pos and return its El
// source representation as a string, plus the new position.
// Returns { "el": String, "pos": Int }.
// Handles: Ident, [Type], Map<K,V>, Type?, Type<T,...> (same shapes as skip_type).
fn scan_type_el(tokens: [Any], pos: Int) -> Map<String, Any> {
let k: String = tok_kind(tokens, pos)
// Array type: [Type]
if str_eq(k, "LBracket") {
let p: Int = pos + 1
let inner = scan_type_el(tokens, p)
let inner_str: String = inner["el"]
let p = inner["pos"]
el_release(inner)
let p = expect(tokens, p, "RBracket")
return { "el": "[" + inner_str + "]", "pos": p }
}
// Named type (possibly generic or optional)
if str_eq(k, "Ident") {
let name: String = tok_value(tokens, pos)
let p: Int = pos + 1
let k2: String = tok_kind(tokens, p)
if str_eq(k2, "Lt") {
// Generic params: collect until matching >
let p = p + 1
let depth: Int = 1
let parts: [String] = native_list_empty()
let parts = native_list_append(parts, name + "<")
let running: Bool = true
while running {
let kk: String = tok_kind(tokens, p)
if str_eq(kk, "Eof") {
let running = false
} else {
if str_eq(kk, "Lt") {
let depth = depth + 1
let parts = native_list_append(parts, "<")
let p = p + 1
} else {
if str_eq(kk, "Gt") {
let depth = depth - 1
let p = p + 1
if depth <= 0 {
let parts = native_list_append(parts, ">")
let running = false
} else {
let parts = native_list_append(parts, ">")
}
} else {
if str_eq(kk, "Comma") {
let parts = native_list_append(parts, ", ")
let p = p + 1
} else {
let parts = native_list_append(parts, tok_value(tokens, p))
let p = p + 1
}
}
}
}
}
let k3: String = tok_kind(tokens, p)
if str_eq(k3, "QuestionMark") { let p = p + 1 }
let result: String = str_join(parts, "")
el_release(parts)
return { "el": result, "pos": p }
}
// Optional marker
if str_eq(k2, "QuestionMark") {
return { "el": name + "?", "pos": p + 1 }
}
return { "el": name, "pos": p }
}
// Fallback: unknown token, treat as Any
{ "el": "Any", "pos": pos + 1 }
}
// scan_params_el scan a parameter list `(name: Type, ...)` starting at
// position `pos` (which should point at LParen) and return the El parameter
// declaration string (e.g. "a: String, b: Int") along with the new position.
// Returns { "el": String, "pos": Int }.
// Used by scan_fn_sigs_el for --emit-header without building full AST.
fn scan_params_el(tokens: [Any], pos: Int) -> Map<String, Any> {
let p: Int = expect(tokens, pos, "LParen")
let parts: [String] = native_list_empty()
let going: Bool = true
while going {
let kk: String = tok_kind(tokens, p)
if str_eq(kk, "RParen") {
let going = false
} else {
if str_eq(kk, "Eof") {
let going = false
} else {
let pname: String = tok_value(tokens, p)
let p = p + 1
let p = expect(tokens, p, "Colon")
let tr = scan_type_el(tokens, p)
let ptype: String = tr["el"]
let p = tr["pos"]
el_release(tr)
let parts = native_list_append(parts, pname + ": " + ptype)
let k2: String = tok_kind(tokens, p)
if str_eq(k2, "Comma") {
let p = p + 1
}
}
}
}
let p = expect(tokens, p, "RParen")
let el_str: String = str_join(parts, ", ")
el_release(parts)
{ "el": el_str, "pos": p }
}
// scan_fn_sigs_el lightweight token-level pre-scan for --emit-header.
//
// Like scan_fn_sigs but captures El-style type strings instead of C types.
// Only records fn/extern_fn entries (header generation ignores lets/blocks).
//
// Descriptor shape:
// { "kind": "fn"|"extern_fn", "name": String,
// "params_el": String, <- El param list, e.g. "a: String, b: Int"
// "ret_el": String } <- El return type, e.g. "String" or "Void"
//
// Peak memory: O(tokens) with no expression AST allocation.
fn scan_fn_sigs_el(tokens: [Any]) -> [Map<String, Any>] {
let total: Int = native_list_len(tokens) / 2
let sigs: [Map<String, Any>] = native_list_empty()
let pos: Int = 0
let going: Bool = true
while going {
if pos >= total {
let going = false
} else {
let k: String = tok_kind(tokens, pos)
if str_eq(k, "Eof") {
let going = false
} else {
// --- fn definition ---
if str_eq(k, "Fn") {
let p: Int = pos + 1
let name: String = tok_value(tokens, p)
let p = p + 1
let pr = scan_params_el(tokens, p)
let params_el: String = pr["el"]
let p = pr["pos"]
el_release(pr)
// read return type
let ret_el: String = "Any"
let k2: String = tok_kind(tokens, p)
if str_eq(k2, "Arrow") {
let p = p + 1
let tr = scan_type_el(tokens, p)
let ret_el = tr["el"]
let p = tr["pos"]
el_release(tr)
}
// skip body
let k3: String = tok_kind(tokens, p)
if str_eq(k3, "LBrace") {
let p = skip_to_rbrace(tokens, p)
}
if !str_eq(name, "main") {
let sigs = native_list_append(sigs, {
"kind": "fn",
"name": name,
"params_el": params_el,
"ret_el": ret_el
})
}
let pos = p
} else {
// --- extern fn ---
if str_eq(k, "Extern") {
let p: Int = pos + 1
let k2: String = tok_kind(tokens, p)
if str_eq(k2, "Fn") {
let p = p + 1
let name: String = tok_value(tokens, p)
let p = p + 1
let pr = scan_params_el(tokens, p)
let params_el: String = pr["el"]
let p = pr["pos"]
el_release(pr)
let ret_el: String = "Any"
let k3: String = tok_kind(tokens, p)
if str_eq(k3, "Arrow") {
let p = p + 1
let tr = scan_type_el(tokens, p)
let ret_el = tr["el"]
let p = tr["pos"]
el_release(tr)
}
let sigs = native_list_append(sigs, {
"kind": "extern_fn",
"name": name,
"params_el": params_el,
"ret_el": ret_el
})
let pos = p
} else {
let pos = pos + 1
}
} else {
// Let, Cgi, Service, Import, Type, Enum, From skip to boundary.
let p: Int = pos + 1
let p = skip_expr_to_stmt_boundary(tokens, p)
let pos = p
}}}
}
}
sigs
}
// scan_params_c scan a parameter list `(name: Type, ...)` starting at
// position `pos` (which should point at LParen) and return the C parameter
// declaration string along with the new position.